btrfs-progs: lowmem check: Fix false alert about file extent interrupt
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize, root->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (cur + sizeof(*di) + name_len > total ||
1516                     name_len > BTRFS_NAME_LEN) {
1517                         error = REF_ERR_NAME_TOO_LONG;
1518
1519                         if (cur + sizeof(*di) > total)
1520                                 break;
1521                         len = min_t(u32, total - cur - sizeof(*di),
1522                                     BTRFS_NAME_LEN);
1523                 } else {
1524                         len = name_len;
1525                         error = 0;
1526                 }
1527
1528                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1529
1530                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1531                         add_inode_backref(inode_cache, location.objectid,
1532                                           key->objectid, key->offset, namebuf,
1533                                           len, filetype, key->type, error);
1534                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1535                         add_inode_backref(root_cache, location.objectid,
1536                                           key->objectid, key->offset,
1537                                           namebuf, len, filetype,
1538                                           key->type, error);
1539                 } else {
1540                         fprintf(stderr, "invalid location in dir item %u\n",
1541                                 location.type);
1542                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1543                                           key->objectid, key->offset, namebuf,
1544                                           len, filetype, key->type, error);
1545                 }
1546
1547                 len = sizeof(*di) + name_len + data_len;
1548                 di = (struct btrfs_dir_item *)((char *)di + len);
1549                 cur += len;
1550         }
1551         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1552                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1553
1554         return 0;
1555 }
1556
1557 static int process_inode_ref(struct extent_buffer *eb,
1558                              int slot, struct btrfs_key *key,
1559                              struct shared_node *active_node)
1560 {
1561         u32 total;
1562         u32 cur = 0;
1563         u32 len;
1564         u32 name_len;
1565         u64 index;
1566         int error;
1567         struct cache_tree *inode_cache;
1568         struct btrfs_inode_ref *ref;
1569         char namebuf[BTRFS_NAME_LEN];
1570
1571         inode_cache = &active_node->inode_cache;
1572
1573         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1574         total = btrfs_item_size_nr(eb, slot);
1575         while (cur < total) {
1576                 name_len = btrfs_inode_ref_name_len(eb, ref);
1577                 index = btrfs_inode_ref_index(eb, ref);
1578
1579                 /* inode_ref + namelen should not cross item boundary */
1580                 if (cur + sizeof(*ref) + name_len > total ||
1581                     name_len > BTRFS_NAME_LEN) {
1582                         if (total < cur + sizeof(*ref))
1583                                 break;
1584
1585                         /* Still try to read out the remaining part */
1586                         len = min_t(u32, total - cur - sizeof(*ref),
1587                                     BTRFS_NAME_LEN);
1588                         error = REF_ERR_NAME_TOO_LONG;
1589                 } else {
1590                         len = name_len;
1591                         error = 0;
1592                 }
1593
1594                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1595                 add_inode_backref(inode_cache, key->objectid, key->offset,
1596                                   index, namebuf, len, 0, key->type, error);
1597
1598                 len = sizeof(*ref) + name_len;
1599                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1600                 cur += len;
1601         }
1602         return 0;
1603 }
1604
1605 static int process_inode_extref(struct extent_buffer *eb,
1606                                 int slot, struct btrfs_key *key,
1607                                 struct shared_node *active_node)
1608 {
1609         u32 total;
1610         u32 cur = 0;
1611         u32 len;
1612         u32 name_len;
1613         u64 index;
1614         u64 parent;
1615         int error;
1616         struct cache_tree *inode_cache;
1617         struct btrfs_inode_extref *extref;
1618         char namebuf[BTRFS_NAME_LEN];
1619
1620         inode_cache = &active_node->inode_cache;
1621
1622         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1623         total = btrfs_item_size_nr(eb, slot);
1624         while (cur < total) {
1625                 name_len = btrfs_inode_extref_name_len(eb, extref);
1626                 index = btrfs_inode_extref_index(eb, extref);
1627                 parent = btrfs_inode_extref_parent(eb, extref);
1628                 if (name_len <= BTRFS_NAME_LEN) {
1629                         len = name_len;
1630                         error = 0;
1631                 } else {
1632                         len = BTRFS_NAME_LEN;
1633                         error = REF_ERR_NAME_TOO_LONG;
1634                 }
1635                 read_extent_buffer(eb, namebuf,
1636                                    (unsigned long)(extref + 1), len);
1637                 add_inode_backref(inode_cache, key->objectid, parent,
1638                                   index, namebuf, len, 0, key->type, error);
1639
1640                 len = sizeof(*extref) + name_len;
1641                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1642                 cur += len;
1643         }
1644         return 0;
1645
1646 }
1647
1648 static int count_csum_range(struct btrfs_root *root, u64 start,
1649                             u64 len, u64 *found)
1650 {
1651         struct btrfs_key key;
1652         struct btrfs_path path;
1653         struct extent_buffer *leaf;
1654         int ret;
1655         size_t size;
1656         *found = 0;
1657         u64 csum_end;
1658         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1659
1660         btrfs_init_path(&path);
1661
1662         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1663         key.offset = start;
1664         key.type = BTRFS_EXTENT_CSUM_KEY;
1665
1666         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1667                                 &key, &path, 0, 0);
1668         if (ret < 0)
1669                 goto out;
1670         if (ret > 0 && path.slots[0] > 0) {
1671                 leaf = path.nodes[0];
1672                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1673                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1674                     key.type == BTRFS_EXTENT_CSUM_KEY)
1675                         path.slots[0]--;
1676         }
1677
1678         while (len > 0) {
1679                 leaf = path.nodes[0];
1680                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1681                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1682                         if (ret > 0)
1683                                 break;
1684                         else if (ret < 0)
1685                                 goto out;
1686                         leaf = path.nodes[0];
1687                 }
1688
1689                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1690                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1691                     key.type != BTRFS_EXTENT_CSUM_KEY)
1692                         break;
1693
1694                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1695                 if (key.offset >= start + len)
1696                         break;
1697
1698                 if (key.offset > start)
1699                         start = key.offset;
1700
1701                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1702                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1703                 if (csum_end > start) {
1704                         size = min(csum_end - start, len);
1705                         len -= size;
1706                         start += size;
1707                         *found += size;
1708                 }
1709
1710                 path.slots[0]++;
1711         }
1712 out:
1713         btrfs_release_path(&path);
1714         if (ret < 0)
1715                 return ret;
1716         return 0;
1717 }
1718
1719 static int process_file_extent(struct btrfs_root *root,
1720                                 struct extent_buffer *eb,
1721                                 int slot, struct btrfs_key *key,
1722                                 struct shared_node *active_node)
1723 {
1724         struct inode_record *rec;
1725         struct btrfs_file_extent_item *fi;
1726         u64 num_bytes = 0;
1727         u64 disk_bytenr = 0;
1728         u64 extent_offset = 0;
1729         u64 mask = root->sectorsize - 1;
1730         int extent_type;
1731         int ret;
1732
1733         rec = active_node->current;
1734         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1735         rec->found_file_extent = 1;
1736
1737         if (rec->extent_start == (u64)-1) {
1738                 rec->extent_start = key->offset;
1739                 rec->extent_end = key->offset;
1740         }
1741
1742         if (rec->extent_end > key->offset)
1743                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1744         else if (rec->extent_end < key->offset) {
1745                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1746                                            key->offset - rec->extent_end);
1747                 if (ret < 0)
1748                         return ret;
1749         }
1750
1751         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1752         extent_type = btrfs_file_extent_type(eb, fi);
1753
1754         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1755                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1756                 if (num_bytes == 0)
1757                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1758                 rec->found_size += num_bytes;
1759                 num_bytes = (num_bytes + mask) & ~mask;
1760         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1761                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1762                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1763                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1764                 extent_offset = btrfs_file_extent_offset(eb, fi);
1765                 if (num_bytes == 0 || (num_bytes & mask))
1766                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1767                 if (num_bytes + extent_offset >
1768                     btrfs_file_extent_ram_bytes(eb, fi))
1769                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1770                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1771                     (btrfs_file_extent_compression(eb, fi) ||
1772                      btrfs_file_extent_encryption(eb, fi) ||
1773                      btrfs_file_extent_other_encoding(eb, fi)))
1774                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775                 if (disk_bytenr > 0)
1776                         rec->found_size += num_bytes;
1777         } else {
1778                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1779         }
1780         rec->extent_end = key->offset + num_bytes;
1781
1782         /*
1783          * The data reloc tree will copy full extents into its inode and then
1784          * copy the corresponding csums.  Because the extent it copied could be
1785          * a preallocated extent that hasn't been written to yet there may be no
1786          * csums to copy, ergo we won't have csums for our file extent.  This is
1787          * ok so just don't bother checking csums if the inode belongs to the
1788          * data reloc tree.
1789          */
1790         if (disk_bytenr > 0 &&
1791             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1792                 u64 found;
1793                 if (btrfs_file_extent_compression(eb, fi))
1794                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1795                 else
1796                         disk_bytenr += extent_offset;
1797
1798                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1799                 if (ret < 0)
1800                         return ret;
1801                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1802                         if (found > 0)
1803                                 rec->found_csum_item = 1;
1804                         if (found < num_bytes)
1805                                 rec->some_csum_missing = 1;
1806                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1807                         if (found > 0)
1808                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1809                 }
1810         }
1811         return 0;
1812 }
1813
1814 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1815                             struct walk_control *wc)
1816 {
1817         struct btrfs_key key;
1818         u32 nritems;
1819         int i;
1820         int ret = 0;
1821         struct cache_tree *inode_cache;
1822         struct shared_node *active_node;
1823
1824         if (wc->root_level == wc->active_node &&
1825             btrfs_root_refs(&root->root_item) == 0)
1826                 return 0;
1827
1828         active_node = wc->nodes[wc->active_node];
1829         inode_cache = &active_node->inode_cache;
1830         nritems = btrfs_header_nritems(eb);
1831         for (i = 0; i < nritems; i++) {
1832                 btrfs_item_key_to_cpu(eb, &key, i);
1833
1834                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1835                         continue;
1836                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1837                         continue;
1838
1839                 if (active_node->current == NULL ||
1840                     active_node->current->ino < key.objectid) {
1841                         if (active_node->current) {
1842                                 active_node->current->checked = 1;
1843                                 maybe_free_inode_rec(inode_cache,
1844                                                      active_node->current);
1845                         }
1846                         active_node->current = get_inode_rec(inode_cache,
1847                                                              key.objectid, 1);
1848                         BUG_ON(IS_ERR(active_node->current));
1849                 }
1850                 switch (key.type) {
1851                 case BTRFS_DIR_ITEM_KEY:
1852                 case BTRFS_DIR_INDEX_KEY:
1853                         ret = process_dir_item(eb, i, &key, active_node);
1854                         break;
1855                 case BTRFS_INODE_REF_KEY:
1856                         ret = process_inode_ref(eb, i, &key, active_node);
1857                         break;
1858                 case BTRFS_INODE_EXTREF_KEY:
1859                         ret = process_inode_extref(eb, i, &key, active_node);
1860                         break;
1861                 case BTRFS_INODE_ITEM_KEY:
1862                         ret = process_inode_item(eb, i, &key, active_node);
1863                         break;
1864                 case BTRFS_EXTENT_DATA_KEY:
1865                         ret = process_file_extent(root, eb, i, &key,
1866                                                   active_node);
1867                         break;
1868                 default:
1869                         break;
1870                 };
1871         }
1872         return ret;
1873 }
1874
1875 struct node_refs {
1876         u64 bytenr[BTRFS_MAX_LEVEL];
1877         u64 refs[BTRFS_MAX_LEVEL];
1878         int need_check[BTRFS_MAX_LEVEL];
1879 };
1880
1881 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1882                              struct node_refs *nrefs, u64 level);
1883 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1884                             unsigned int ext_ref);
1885
1886 /*
1887  * Returns >0  Found error, not fatal, should continue
1888  * Returns <0  Fatal error, must exit the whole check
1889  * Returns 0   No errors found
1890  */
1891 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1892                                struct node_refs *nrefs, int *level, int ext_ref)
1893 {
1894         struct extent_buffer *cur = path->nodes[0];
1895         struct btrfs_key key;
1896         u64 cur_bytenr;
1897         u32 nritems;
1898         u64 first_ino = 0;
1899         int root_level = btrfs_header_level(root->node);
1900         int i;
1901         int ret = 0; /* Final return value */
1902         int err = 0; /* Positive error bitmap */
1903
1904         cur_bytenr = cur->start;
1905
1906         /* skip to first inode item or the first inode number change */
1907         nritems = btrfs_header_nritems(cur);
1908         for (i = 0; i < nritems; i++) {
1909                 btrfs_item_key_to_cpu(cur, &key, i);
1910                 if (i == 0)
1911                         first_ino = key.objectid;
1912                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1913                     (first_ino && first_ino != key.objectid))
1914                         break;
1915         }
1916         if (i == nritems) {
1917                 path->slots[0] = nritems;
1918                 return 0;
1919         }
1920         path->slots[0] = i;
1921
1922 again:
1923         err |= check_inode_item(root, path, ext_ref);
1924
1925         if (err & LAST_ITEM)
1926                 goto out;
1927
1928         /* still have inode items in thie leaf */
1929         if (cur->start == cur_bytenr)
1930                 goto again;
1931
1932         /*
1933          * we have switched to another leaf, above nodes may
1934          * have changed, here walk down the path, if a node
1935          * or leaf is shared, check whether we can skip this
1936          * node or leaf.
1937          */
1938         for (i = root_level; i >= 0; i--) {
1939                 if (path->nodes[i]->start == nrefs->bytenr[i])
1940                         continue;
1941
1942                 ret = update_nodes_refs(root,
1943                                 path->nodes[i]->start,
1944                                 nrefs, i);
1945                 if (ret)
1946                         goto out;
1947
1948                 if (!nrefs->need_check[i]) {
1949                         *level += 1;
1950                         break;
1951                 }
1952         }
1953
1954         for (i = 0; i < *level; i++) {
1955                 free_extent_buffer(path->nodes[i]);
1956                 path->nodes[i] = NULL;
1957         }
1958 out:
1959         err &= ~LAST_ITEM;
1960         if (err && !ret)
1961                 ret = err;
1962         return ret;
1963 }
1964
1965 static void reada_walk_down(struct btrfs_root *root,
1966                             struct extent_buffer *node, int slot)
1967 {
1968         u64 bytenr;
1969         u64 ptr_gen;
1970         u32 nritems;
1971         u32 blocksize;
1972         int i;
1973         int level;
1974
1975         level = btrfs_header_level(node);
1976         if (level != 1)
1977                 return;
1978
1979         nritems = btrfs_header_nritems(node);
1980         blocksize = root->nodesize;
1981         for (i = slot; i < nritems; i++) {
1982                 bytenr = btrfs_node_blockptr(node, i);
1983                 ptr_gen = btrfs_node_ptr_generation(node, i);
1984                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1985         }
1986 }
1987
1988 /*
1989  * Check the child node/leaf by the following condition:
1990  * 1. the first item key of the node/leaf should be the same with the one
1991  *    in parent.
1992  * 2. block in parent node should match the child node/leaf.
1993  * 3. generation of parent node and child's header should be consistent.
1994  *
1995  * Or the child node/leaf pointed by the key in parent is not valid.
1996  *
1997  * We hope to check leaf owner too, but since subvol may share leaves,
1998  * which makes leaf owner check not so strong, key check should be
1999  * sufficient enough for that case.
2000  */
2001 static int check_child_node(struct extent_buffer *parent, int slot,
2002                             struct extent_buffer *child)
2003 {
2004         struct btrfs_key parent_key;
2005         struct btrfs_key child_key;
2006         int ret = 0;
2007
2008         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2009         if (btrfs_header_level(child) == 0)
2010                 btrfs_item_key_to_cpu(child, &child_key, 0);
2011         else
2012                 btrfs_node_key_to_cpu(child, &child_key, 0);
2013
2014         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2015                 ret = -EINVAL;
2016                 fprintf(stderr,
2017                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2018                         parent_key.objectid, parent_key.type, parent_key.offset,
2019                         child_key.objectid, child_key.type, child_key.offset);
2020         }
2021         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2022                 ret = -EINVAL;
2023                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2024                         btrfs_node_blockptr(parent, slot),
2025                         btrfs_header_bytenr(child));
2026         }
2027         if (btrfs_node_ptr_generation(parent, slot) !=
2028             btrfs_header_generation(child)) {
2029                 ret = -EINVAL;
2030                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2031                         btrfs_header_generation(child),
2032                         btrfs_node_ptr_generation(parent, slot));
2033         }
2034         return ret;
2035 }
2036
2037 /*
2038  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2039  * in every fs or file tree check. Here we find its all root ids, and only check
2040  * it in the fs or file tree which has the smallest root id.
2041  */
2042 static int need_check(struct btrfs_root *root, struct ulist *roots)
2043 {
2044         struct rb_node *node;
2045         struct ulist_node *u;
2046
2047         if (roots->nnodes == 1)
2048                 return 1;
2049
2050         node = rb_first(&roots->root);
2051         u = rb_entry(node, struct ulist_node, rb_node);
2052         /*
2053          * current root id is not smallest, we skip it and let it be checked
2054          * in the fs or file tree who hash the smallest root id.
2055          */
2056         if (root->objectid != u->val)
2057                 return 0;
2058
2059         return 1;
2060 }
2061
2062 /*
2063  * for a tree node or leaf, we record its reference count, so later if we still
2064  * process this node or leaf, don't need to compute its reference count again.
2065  */
2066 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2067                              struct node_refs *nrefs, u64 level)
2068 {
2069         int check, ret;
2070         u64 refs;
2071         struct ulist *roots;
2072
2073         if (nrefs->bytenr[level] != bytenr) {
2074                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2075                                        level, 1, &refs, NULL);
2076                 if (ret < 0)
2077                         return ret;
2078
2079                 nrefs->bytenr[level] = bytenr;
2080                 nrefs->refs[level] = refs;
2081                 if (refs > 1) {
2082                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2083                                                    0, &roots);
2084                         if (ret)
2085                                 return -EIO;
2086
2087                         check = need_check(root, roots);
2088                         ulist_free(roots);
2089                         nrefs->need_check[level] = check;
2090                 } else {
2091                         nrefs->need_check[level] = 1;
2092                 }
2093         }
2094
2095         return 0;
2096 }
2097
2098 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2099                           struct walk_control *wc, int *level,
2100                           struct node_refs *nrefs)
2101 {
2102         enum btrfs_tree_block_status status;
2103         u64 bytenr;
2104         u64 ptr_gen;
2105         struct extent_buffer *next;
2106         struct extent_buffer *cur;
2107         u32 blocksize;
2108         int ret, err = 0;
2109         u64 refs;
2110
2111         WARN_ON(*level < 0);
2112         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2113
2114         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2115                 refs = nrefs->refs[*level];
2116                 ret = 0;
2117         } else {
2118                 ret = btrfs_lookup_extent_info(NULL, root,
2119                                        path->nodes[*level]->start,
2120                                        *level, 1, &refs, NULL);
2121                 if (ret < 0) {
2122                         err = ret;
2123                         goto out;
2124                 }
2125                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2126                 nrefs->refs[*level] = refs;
2127         }
2128
2129         if (refs > 1) {
2130                 ret = enter_shared_node(root, path->nodes[*level]->start,
2131                                         refs, wc, *level);
2132                 if (ret > 0) {
2133                         err = ret;
2134                         goto out;
2135                 }
2136         }
2137
2138         while (*level >= 0) {
2139                 WARN_ON(*level < 0);
2140                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2141                 cur = path->nodes[*level];
2142
2143                 if (btrfs_header_level(cur) != *level)
2144                         WARN_ON(1);
2145
2146                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2147                         break;
2148                 if (*level == 0) {
2149                         ret = process_one_leaf(root, cur, wc);
2150                         if (ret < 0)
2151                                 err = ret;
2152                         break;
2153                 }
2154                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2155                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2156                 blocksize = root->nodesize;
2157
2158                 if (bytenr == nrefs->bytenr[*level - 1]) {
2159                         refs = nrefs->refs[*level - 1];
2160                 } else {
2161                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2162                                         *level - 1, 1, &refs, NULL);
2163                         if (ret < 0) {
2164                                 refs = 0;
2165                         } else {
2166                                 nrefs->bytenr[*level - 1] = bytenr;
2167                                 nrefs->refs[*level - 1] = refs;
2168                         }
2169                 }
2170
2171                 if (refs > 1) {
2172                         ret = enter_shared_node(root, bytenr, refs,
2173                                                 wc, *level - 1);
2174                         if (ret > 0) {
2175                                 path->slots[*level]++;
2176                                 continue;
2177                         }
2178                 }
2179
2180                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2181                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2182                         free_extent_buffer(next);
2183                         reada_walk_down(root, cur, path->slots[*level]);
2184                         next = read_tree_block(root, bytenr, blocksize,
2185                                                ptr_gen);
2186                         if (!extent_buffer_uptodate(next)) {
2187                                 struct btrfs_key node_key;
2188
2189                                 btrfs_node_key_to_cpu(path->nodes[*level],
2190                                                       &node_key,
2191                                                       path->slots[*level]);
2192                                 btrfs_add_corrupt_extent_record(root->fs_info,
2193                                                 &node_key,
2194                                                 path->nodes[*level]->start,
2195                                                 root->nodesize, *level);
2196                                 err = -EIO;
2197                                 goto out;
2198                         }
2199                 }
2200
2201                 ret = check_child_node(cur, path->slots[*level], next);
2202                 if (ret) {
2203                         free_extent_buffer(next);
2204                         err = ret;
2205                         goto out;
2206                 }
2207
2208                 if (btrfs_is_leaf(next))
2209                         status = btrfs_check_leaf(root, NULL, next);
2210                 else
2211                         status = btrfs_check_node(root, NULL, next);
2212                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2213                         free_extent_buffer(next);
2214                         err = -EIO;
2215                         goto out;
2216                 }
2217
2218                 *level = *level - 1;
2219                 free_extent_buffer(path->nodes[*level]);
2220                 path->nodes[*level] = next;
2221                 path->slots[*level] = 0;
2222         }
2223 out:
2224         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2225         return err;
2226 }
2227
2228 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2229                             unsigned int ext_ref);
2230
2231 /*
2232  * Returns >0  Found error, should continue
2233  * Returns <0  Fatal error, must exit the whole check
2234  * Returns 0   No errors found
2235  */
2236 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2237                              int *level, struct node_refs *nrefs, int ext_ref)
2238 {
2239         enum btrfs_tree_block_status status;
2240         u64 bytenr;
2241         u64 ptr_gen;
2242         struct extent_buffer *next;
2243         struct extent_buffer *cur;
2244         u32 blocksize;
2245         int ret;
2246
2247         WARN_ON(*level < 0);
2248         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2249
2250         ret = update_nodes_refs(root, path->nodes[*level]->start,
2251                                 nrefs, *level);
2252         if (ret < 0)
2253                 return ret;
2254
2255         while (*level >= 0) {
2256                 WARN_ON(*level < 0);
2257                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2258                 cur = path->nodes[*level];
2259
2260                 if (btrfs_header_level(cur) != *level)
2261                         WARN_ON(1);
2262
2263                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2264                         break;
2265                 /* Don't forgot to check leaf/node validation */
2266                 if (*level == 0) {
2267                         ret = btrfs_check_leaf(root, NULL, cur);
2268                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2269                                 ret = -EIO;
2270                                 break;
2271                         }
2272                         ret = process_one_leaf_v2(root, path, nrefs,
2273                                                   level, ext_ref);
2274                         break;
2275                 } else {
2276                         ret = btrfs_check_node(root, NULL, cur);
2277                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2278                                 ret = -EIO;
2279                                 break;
2280                         }
2281                 }
2282                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2283                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2284                 blocksize = root->nodesize;
2285
2286                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2287                 if (ret)
2288                         break;
2289                 if (!nrefs->need_check[*level - 1]) {
2290                         path->slots[*level]++;
2291                         continue;
2292                 }
2293
2294                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2295                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2296                         free_extent_buffer(next);
2297                         reada_walk_down(root, cur, path->slots[*level]);
2298                         next = read_tree_block(root, bytenr, blocksize,
2299                                                ptr_gen);
2300                         if (!extent_buffer_uptodate(next)) {
2301                                 struct btrfs_key node_key;
2302
2303                                 btrfs_node_key_to_cpu(path->nodes[*level],
2304                                                       &node_key,
2305                                                       path->slots[*level]);
2306                                 btrfs_add_corrupt_extent_record(root->fs_info,
2307                                                 &node_key,
2308                                                 path->nodes[*level]->start,
2309                                                 root->nodesize, *level);
2310                                 ret = -EIO;
2311                                 break;
2312                         }
2313                 }
2314
2315                 ret = check_child_node(cur, path->slots[*level], next);
2316                 if (ret < 0) 
2317                         break;
2318
2319                 if (btrfs_is_leaf(next))
2320                         status = btrfs_check_leaf(root, NULL, next);
2321                 else
2322                         status = btrfs_check_node(root, NULL, next);
2323                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2324                         free_extent_buffer(next);
2325                         ret = -EIO;
2326                         break;
2327                 }
2328
2329                 *level = *level - 1;
2330                 free_extent_buffer(path->nodes[*level]);
2331                 path->nodes[*level] = next;
2332                 path->slots[*level] = 0;
2333         }
2334         return ret;
2335 }
2336
2337 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2338                         struct walk_control *wc, int *level)
2339 {
2340         int i;
2341         struct extent_buffer *leaf;
2342
2343         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2344                 leaf = path->nodes[i];
2345                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2346                         path->slots[i]++;
2347                         *level = i;
2348                         return 0;
2349                 } else {
2350                         free_extent_buffer(path->nodes[*level]);
2351                         path->nodes[*level] = NULL;
2352                         BUG_ON(*level > wc->active_node);
2353                         if (*level == wc->active_node)
2354                                 leave_shared_node(root, wc, *level);
2355                         *level = i + 1;
2356                 }
2357         }
2358         return 1;
2359 }
2360
2361 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2362                            int *level)
2363 {
2364         int i;
2365         struct extent_buffer *leaf;
2366
2367         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2368                 leaf = path->nodes[i];
2369                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2370                         path->slots[i]++;
2371                         *level = i;
2372                         return 0;
2373                 } else {
2374                         free_extent_buffer(path->nodes[*level]);
2375                         path->nodes[*level] = NULL;
2376                         *level = i + 1;
2377                 }
2378         }
2379         return 1;
2380 }
2381
2382 static int check_root_dir(struct inode_record *rec)
2383 {
2384         struct inode_backref *backref;
2385         int ret = -1;
2386
2387         if (!rec->found_inode_item || rec->errors)
2388                 goto out;
2389         if (rec->nlink != 1 || rec->found_link != 0)
2390                 goto out;
2391         if (list_empty(&rec->backrefs))
2392                 goto out;
2393         backref = to_inode_backref(rec->backrefs.next);
2394         if (!backref->found_inode_ref)
2395                 goto out;
2396         if (backref->index != 0 || backref->namelen != 2 ||
2397             memcmp(backref->name, "..", 2))
2398                 goto out;
2399         if (backref->found_dir_index || backref->found_dir_item)
2400                 goto out;
2401         ret = 0;
2402 out:
2403         return ret;
2404 }
2405
2406 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2407                               struct btrfs_root *root, struct btrfs_path *path,
2408                               struct inode_record *rec)
2409 {
2410         struct btrfs_inode_item *ei;
2411         struct btrfs_key key;
2412         int ret;
2413
2414         key.objectid = rec->ino;
2415         key.type = BTRFS_INODE_ITEM_KEY;
2416         key.offset = (u64)-1;
2417
2418         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2419         if (ret < 0)
2420                 goto out;
2421         if (ret) {
2422                 if (!path->slots[0]) {
2423                         ret = -ENOENT;
2424                         goto out;
2425                 }
2426                 path->slots[0]--;
2427                 ret = 0;
2428         }
2429         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2430         if (key.objectid != rec->ino) {
2431                 ret = -ENOENT;
2432                 goto out;
2433         }
2434
2435         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2436                             struct btrfs_inode_item);
2437         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2438         btrfs_mark_buffer_dirty(path->nodes[0]);
2439         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2440         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2441                root->root_key.objectid);
2442 out:
2443         btrfs_release_path(path);
2444         return ret;
2445 }
2446
2447 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2448                                     struct btrfs_root *root,
2449                                     struct btrfs_path *path,
2450                                     struct inode_record *rec)
2451 {
2452         int ret;
2453
2454         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2455         btrfs_release_path(path);
2456         if (!ret)
2457                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2458         return ret;
2459 }
2460
2461 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2462                                struct btrfs_root *root,
2463                                struct btrfs_path *path,
2464                                struct inode_record *rec)
2465 {
2466         struct btrfs_inode_item *ei;
2467         struct btrfs_key key;
2468         int ret = 0;
2469
2470         key.objectid = rec->ino;
2471         key.type = BTRFS_INODE_ITEM_KEY;
2472         key.offset = 0;
2473
2474         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2475         if (ret) {
2476                 if (ret > 0)
2477                         ret = -ENOENT;
2478                 goto out;
2479         }
2480
2481         /* Since ret == 0, no need to check anything */
2482         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2483                             struct btrfs_inode_item);
2484         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2485         btrfs_mark_buffer_dirty(path->nodes[0]);
2486         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2487         printf("reset nbytes for ino %llu root %llu\n",
2488                rec->ino, root->root_key.objectid);
2489 out:
2490         btrfs_release_path(path);
2491         return ret;
2492 }
2493
2494 static int add_missing_dir_index(struct btrfs_root *root,
2495                                  struct cache_tree *inode_cache,
2496                                  struct inode_record *rec,
2497                                  struct inode_backref *backref)
2498 {
2499         struct btrfs_path path;
2500         struct btrfs_trans_handle *trans;
2501         struct btrfs_dir_item *dir_item;
2502         struct extent_buffer *leaf;
2503         struct btrfs_key key;
2504         struct btrfs_disk_key disk_key;
2505         struct inode_record *dir_rec;
2506         unsigned long name_ptr;
2507         u32 data_size = sizeof(*dir_item) + backref->namelen;
2508         int ret;
2509
2510         trans = btrfs_start_transaction(root, 1);
2511         if (IS_ERR(trans))
2512                 return PTR_ERR(trans);
2513
2514         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2515                 (unsigned long long)rec->ino);
2516
2517         btrfs_init_path(&path);
2518         key.objectid = backref->dir;
2519         key.type = BTRFS_DIR_INDEX_KEY;
2520         key.offset = backref->index;
2521         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2522         BUG_ON(ret);
2523
2524         leaf = path.nodes[0];
2525         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2526
2527         disk_key.objectid = cpu_to_le64(rec->ino);
2528         disk_key.type = BTRFS_INODE_ITEM_KEY;
2529         disk_key.offset = 0;
2530
2531         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2532         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2533         btrfs_set_dir_data_len(leaf, dir_item, 0);
2534         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2535         name_ptr = (unsigned long)(dir_item + 1);
2536         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2537         btrfs_mark_buffer_dirty(leaf);
2538         btrfs_release_path(&path);
2539         btrfs_commit_transaction(trans, root);
2540
2541         backref->found_dir_index = 1;
2542         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2543         BUG_ON(IS_ERR(dir_rec));
2544         if (!dir_rec)
2545                 return 0;
2546         dir_rec->found_size += backref->namelen;
2547         if (dir_rec->found_size == dir_rec->isize &&
2548             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2549                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2550         if (dir_rec->found_size != dir_rec->isize)
2551                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2552
2553         return 0;
2554 }
2555
2556 static int delete_dir_index(struct btrfs_root *root,
2557                             struct inode_backref *backref)
2558 {
2559         struct btrfs_trans_handle *trans;
2560         struct btrfs_dir_item *di;
2561         struct btrfs_path path;
2562         int ret = 0;
2563
2564         trans = btrfs_start_transaction(root, 1);
2565         if (IS_ERR(trans))
2566                 return PTR_ERR(trans);
2567
2568         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2569                 (unsigned long long)backref->dir,
2570                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2571                 (unsigned long long)root->objectid);
2572
2573         btrfs_init_path(&path);
2574         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2575                                     backref->name, backref->namelen,
2576                                     backref->index, -1);
2577         if (IS_ERR(di)) {
2578                 ret = PTR_ERR(di);
2579                 btrfs_release_path(&path);
2580                 btrfs_commit_transaction(trans, root);
2581                 if (ret == -ENOENT)
2582                         return 0;
2583                 return ret;
2584         }
2585
2586         if (!di)
2587                 ret = btrfs_del_item(trans, root, &path);
2588         else
2589                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2590         BUG_ON(ret);
2591         btrfs_release_path(&path);
2592         btrfs_commit_transaction(trans, root);
2593         return ret;
2594 }
2595
2596 static int create_inode_item(struct btrfs_root *root,
2597                              struct inode_record *rec,
2598                              int root_dir)
2599 {
2600         struct btrfs_trans_handle *trans;
2601         struct btrfs_inode_item inode_item;
2602         time_t now = time(NULL);
2603         int ret;
2604
2605         trans = btrfs_start_transaction(root, 1);
2606         if (IS_ERR(trans)) {
2607                 ret = PTR_ERR(trans);
2608                 return ret;
2609         }
2610
2611         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2612                 "be incomplete, please check permissions and content after "
2613                 "the fsck completes.\n", (unsigned long long)root->objectid,
2614                 (unsigned long long)rec->ino);
2615
2616         memset(&inode_item, 0, sizeof(inode_item));
2617         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2618         if (root_dir)
2619                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2620         else
2621                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2622         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2623         if (rec->found_dir_item) {
2624                 if (rec->found_file_extent)
2625                         fprintf(stderr, "root %llu inode %llu has both a dir "
2626                                 "item and extents, unsure if it is a dir or a "
2627                                 "regular file so setting it as a directory\n",
2628                                 (unsigned long long)root->objectid,
2629                                 (unsigned long long)rec->ino);
2630                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2631                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2632         } else if (!rec->found_dir_item) {
2633                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2634                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2635         }
2636         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2637         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2638         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2639         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2640         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2641         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2642         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2643         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2644
2645         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2646         BUG_ON(ret);
2647         btrfs_commit_transaction(trans, root);
2648         return 0;
2649 }
2650
2651 static int repair_inode_backrefs(struct btrfs_root *root,
2652                                  struct inode_record *rec,
2653                                  struct cache_tree *inode_cache,
2654                                  int delete)
2655 {
2656         struct inode_backref *tmp, *backref;
2657         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2658         int ret = 0;
2659         int repaired = 0;
2660
2661         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2662                 if (!delete && rec->ino == root_dirid) {
2663                         if (!rec->found_inode_item) {
2664                                 ret = create_inode_item(root, rec, 1);
2665                                 if (ret)
2666                                         break;
2667                                 repaired++;
2668                         }
2669                 }
2670
2671                 /* Index 0 for root dir's are special, don't mess with it */
2672                 if (rec->ino == root_dirid && backref->index == 0)
2673                         continue;
2674
2675                 if (delete &&
2676                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2677                      (backref->found_dir_index && backref->found_inode_ref &&
2678                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2679                         ret = delete_dir_index(root, backref);
2680                         if (ret)
2681                                 break;
2682                         repaired++;
2683                         list_del(&backref->list);
2684                         free(backref);
2685                         continue;
2686                 }
2687
2688                 if (!delete && !backref->found_dir_index &&
2689                     backref->found_dir_item && backref->found_inode_ref) {
2690                         ret = add_missing_dir_index(root, inode_cache, rec,
2691                                                     backref);
2692                         if (ret)
2693                                 break;
2694                         repaired++;
2695                         if (backref->found_dir_item &&
2696                             backref->found_dir_index) {
2697                                 if (!backref->errors &&
2698                                     backref->found_inode_ref) {
2699                                         list_del(&backref->list);
2700                                         free(backref);
2701                                         continue;
2702                                 }
2703                         }
2704                 }
2705
2706                 if (!delete && (!backref->found_dir_index &&
2707                                 !backref->found_dir_item &&
2708                                 backref->found_inode_ref)) {
2709                         struct btrfs_trans_handle *trans;
2710                         struct btrfs_key location;
2711
2712                         ret = check_dir_conflict(root, backref->name,
2713                                                  backref->namelen,
2714                                                  backref->dir,
2715                                                  backref->index);
2716                         if (ret) {
2717                                 /*
2718                                  * let nlink fixing routine to handle it,
2719                                  * which can do it better.
2720                                  */
2721                                 ret = 0;
2722                                 break;
2723                         }
2724                         location.objectid = rec->ino;
2725                         location.type = BTRFS_INODE_ITEM_KEY;
2726                         location.offset = 0;
2727
2728                         trans = btrfs_start_transaction(root, 1);
2729                         if (IS_ERR(trans)) {
2730                                 ret = PTR_ERR(trans);
2731                                 break;
2732                         }
2733                         fprintf(stderr, "adding missing dir index/item pair "
2734                                 "for inode %llu\n",
2735                                 (unsigned long long)rec->ino);
2736                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2737                                                     backref->namelen,
2738                                                     backref->dir, &location,
2739                                                     imode_to_type(rec->imode),
2740                                                     backref->index);
2741                         BUG_ON(ret);
2742                         btrfs_commit_transaction(trans, root);
2743                         repaired++;
2744                 }
2745
2746                 if (!delete && (backref->found_inode_ref &&
2747                                 backref->found_dir_index &&
2748                                 backref->found_dir_item &&
2749                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2750                                 !rec->found_inode_item)) {
2751                         ret = create_inode_item(root, rec, 0);
2752                         if (ret)
2753                                 break;
2754                         repaired++;
2755                 }
2756
2757         }
2758         return ret ? ret : repaired;
2759 }
2760
2761 /*
2762  * To determine the file type for nlink/inode_item repair
2763  *
2764  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2765  * Return -ENOENT if file type is not found.
2766  */
2767 static int find_file_type(struct inode_record *rec, u8 *type)
2768 {
2769         struct inode_backref *backref;
2770
2771         /* For inode item recovered case */
2772         if (rec->found_inode_item) {
2773                 *type = imode_to_type(rec->imode);
2774                 return 0;
2775         }
2776
2777         list_for_each_entry(backref, &rec->backrefs, list) {
2778                 if (backref->found_dir_index || backref->found_dir_item) {
2779                         *type = backref->filetype;
2780                         return 0;
2781                 }
2782         }
2783         return -ENOENT;
2784 }
2785
2786 /*
2787  * To determine the file name for nlink repair
2788  *
2789  * Return 0 if file name is found, set name and namelen.
2790  * Return -ENOENT if file name is not found.
2791  */
2792 static int find_file_name(struct inode_record *rec,
2793                           char *name, int *namelen)
2794 {
2795         struct inode_backref *backref;
2796
2797         list_for_each_entry(backref, &rec->backrefs, list) {
2798                 if (backref->found_dir_index || backref->found_dir_item ||
2799                     backref->found_inode_ref) {
2800                         memcpy(name, backref->name, backref->namelen);
2801                         *namelen = backref->namelen;
2802                         return 0;
2803                 }
2804         }
2805         return -ENOENT;
2806 }
2807
2808 /* Reset the nlink of the inode to the correct one */
2809 static int reset_nlink(struct btrfs_trans_handle *trans,
2810                        struct btrfs_root *root,
2811                        struct btrfs_path *path,
2812                        struct inode_record *rec)
2813 {
2814         struct inode_backref *backref;
2815         struct inode_backref *tmp;
2816         struct btrfs_key key;
2817         struct btrfs_inode_item *inode_item;
2818         int ret = 0;
2819
2820         /* We don't believe this either, reset it and iterate backref */
2821         rec->found_link = 0;
2822
2823         /* Remove all backref including the valid ones */
2824         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2825                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2826                                    backref->index, backref->name,
2827                                    backref->namelen, 0);
2828                 if (ret < 0)
2829                         goto out;
2830
2831                 /* remove invalid backref, so it won't be added back */
2832                 if (!(backref->found_dir_index &&
2833                       backref->found_dir_item &&
2834                       backref->found_inode_ref)) {
2835                         list_del(&backref->list);
2836                         free(backref);
2837                 } else {
2838                         rec->found_link++;
2839                 }
2840         }
2841
2842         /* Set nlink to 0 */
2843         key.objectid = rec->ino;
2844         key.type = BTRFS_INODE_ITEM_KEY;
2845         key.offset = 0;
2846         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2847         if (ret < 0)
2848                 goto out;
2849         if (ret > 0) {
2850                 ret = -ENOENT;
2851                 goto out;
2852         }
2853         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2854                                     struct btrfs_inode_item);
2855         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2856         btrfs_mark_buffer_dirty(path->nodes[0]);
2857         btrfs_release_path(path);
2858
2859         /*
2860          * Add back valid inode_ref/dir_item/dir_index,
2861          * add_link() will handle the nlink inc, so new nlink must be correct
2862          */
2863         list_for_each_entry(backref, &rec->backrefs, list) {
2864                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2865                                      backref->name, backref->namelen,
2866                                      backref->filetype, &backref->index, 1);
2867                 if (ret < 0)
2868                         goto out;
2869         }
2870 out:
2871         btrfs_release_path(path);
2872         return ret;
2873 }
2874
2875 static int get_highest_inode(struct btrfs_trans_handle *trans,
2876                                 struct btrfs_root *root,
2877                                 struct btrfs_path *path,
2878                                 u64 *highest_ino)
2879 {
2880         struct btrfs_key key, found_key;
2881         int ret;
2882
2883         btrfs_init_path(path);
2884         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2885         key.offset = -1;
2886         key.type = BTRFS_INODE_ITEM_KEY;
2887         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2888         if (ret == 1) {
2889                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2890                                 path->slots[0] - 1);
2891                 *highest_ino = found_key.objectid;
2892                 ret = 0;
2893         }
2894         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2895                 ret = -EOVERFLOW;
2896         btrfs_release_path(path);
2897         return ret;
2898 }
2899
2900 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2901                                struct btrfs_root *root,
2902                                struct btrfs_path *path,
2903                                struct inode_record *rec)
2904 {
2905         char *dir_name = "lost+found";
2906         char namebuf[BTRFS_NAME_LEN] = {0};
2907         u64 lost_found_ino;
2908         u32 mode = 0700;
2909         u8 type = 0;
2910         int namelen = 0;
2911         int name_recovered = 0;
2912         int type_recovered = 0;
2913         int ret = 0;
2914
2915         /*
2916          * Get file name and type first before these invalid inode ref
2917          * are deleted by remove_all_invalid_backref()
2918          */
2919         name_recovered = !find_file_name(rec, namebuf, &namelen);
2920         type_recovered = !find_file_type(rec, &type);
2921
2922         if (!name_recovered) {
2923                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2924                        rec->ino, rec->ino);
2925                 namelen = count_digits(rec->ino);
2926                 sprintf(namebuf, "%llu", rec->ino);
2927                 name_recovered = 1;
2928         }
2929         if (!type_recovered) {
2930                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2931                        rec->ino);
2932                 type = BTRFS_FT_REG_FILE;
2933                 type_recovered = 1;
2934         }
2935
2936         ret = reset_nlink(trans, root, path, rec);
2937         if (ret < 0) {
2938                 fprintf(stderr,
2939                         "Failed to reset nlink for inode %llu: %s\n",
2940                         rec->ino, strerror(-ret));
2941                 goto out;
2942         }
2943
2944         if (rec->found_link == 0) {
2945                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2946                 if (ret < 0)
2947                         goto out;
2948                 lost_found_ino++;
2949                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2950                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2951                                   mode);
2952                 if (ret < 0) {
2953                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2954                                 dir_name, strerror(-ret));
2955                         goto out;
2956                 }
2957                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2958                                      namebuf, namelen, type, NULL, 1);
2959                 /*
2960                  * Add ".INO" suffix several times to handle case where
2961                  * "FILENAME.INO" is already taken by another file.
2962                  */
2963                 while (ret == -EEXIST) {
2964                         /*
2965                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2966                          */
2967                         if (namelen + count_digits(rec->ino) + 1 >
2968                             BTRFS_NAME_LEN) {
2969                                 ret = -EFBIG;
2970                                 goto out;
2971                         }
2972                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2973                                  ".%llu", rec->ino);
2974                         namelen += count_digits(rec->ino) + 1;
2975                         ret = btrfs_add_link(trans, root, rec->ino,
2976                                              lost_found_ino, namebuf,
2977                                              namelen, type, NULL, 1);
2978                 }
2979                 if (ret < 0) {
2980                         fprintf(stderr,
2981                                 "Failed to link the inode %llu to %s dir: %s\n",
2982                                 rec->ino, dir_name, strerror(-ret));
2983                         goto out;
2984                 }
2985                 /*
2986                  * Just increase the found_link, don't actually add the
2987                  * backref. This will make things easier and this inode
2988                  * record will be freed after the repair is done.
2989                  * So fsck will not report problem about this inode.
2990                  */
2991                 rec->found_link++;
2992                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2993                        namelen, namebuf, dir_name);
2994         }
2995         printf("Fixed the nlink of inode %llu\n", rec->ino);
2996 out:
2997         /*
2998          * Clear the flag anyway, or we will loop forever for the same inode
2999          * as it will not be removed from the bad inode list and the dead loop
3000          * happens.
3001          */
3002         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3003         btrfs_release_path(path);
3004         return ret;
3005 }
3006
3007 /*
3008  * Check if there is any normal(reg or prealloc) file extent for given
3009  * ino.
3010  * This is used to determine the file type when neither its dir_index/item or
3011  * inode_item exists.
3012  *
3013  * This will *NOT* report error, if any error happens, just consider it does
3014  * not have any normal file extent.
3015  */
3016 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3017 {
3018         struct btrfs_path path;
3019         struct btrfs_key key;
3020         struct btrfs_key found_key;
3021         struct btrfs_file_extent_item *fi;
3022         u8 type;
3023         int ret = 0;
3024
3025         btrfs_init_path(&path);
3026         key.objectid = ino;
3027         key.type = BTRFS_EXTENT_DATA_KEY;
3028         key.offset = 0;
3029
3030         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3031         if (ret < 0) {
3032                 ret = 0;
3033                 goto out;
3034         }
3035         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3036                 ret = btrfs_next_leaf(root, &path);
3037                 if (ret) {
3038                         ret = 0;
3039                         goto out;
3040                 }
3041         }
3042         while (1) {
3043                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3044                                       path.slots[0]);
3045                 if (found_key.objectid != ino ||
3046                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3047                         break;
3048                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3049                                     struct btrfs_file_extent_item);
3050                 type = btrfs_file_extent_type(path.nodes[0], fi);
3051                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3052                         ret = 1;
3053                         goto out;
3054                 }
3055         }
3056 out:
3057         btrfs_release_path(&path);
3058         return ret;
3059 }
3060
3061 static u32 btrfs_type_to_imode(u8 type)
3062 {
3063         static u32 imode_by_btrfs_type[] = {
3064                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3065                 [BTRFS_FT_DIR]          = S_IFDIR,
3066                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3067                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3068                 [BTRFS_FT_FIFO]         = S_IFIFO,
3069                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3070                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3071         };
3072
3073         return imode_by_btrfs_type[(type)];
3074 }
3075
3076 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3077                                 struct btrfs_root *root,
3078                                 struct btrfs_path *path,
3079                                 struct inode_record *rec)
3080 {
3081         u8 filetype;
3082         u32 mode = 0700;
3083         int type_recovered = 0;
3084         int ret = 0;
3085
3086         printf("Trying to rebuild inode:%llu\n", rec->ino);
3087
3088         type_recovered = !find_file_type(rec, &filetype);
3089
3090         /*
3091          * Try to determine inode type if type not found.
3092          *
3093          * For found regular file extent, it must be FILE.
3094          * For found dir_item/index, it must be DIR.
3095          *
3096          * For undetermined one, use FILE as fallback.
3097          *
3098          * TODO:
3099          * 1. If found backref(inode_index/item is already handled) to it,
3100          *    it must be DIR.
3101          *    Need new inode-inode ref structure to allow search for that.
3102          */
3103         if (!type_recovered) {
3104                 if (rec->found_file_extent &&
3105                     find_normal_file_extent(root, rec->ino)) {
3106                         type_recovered = 1;
3107                         filetype = BTRFS_FT_REG_FILE;
3108                 } else if (rec->found_dir_item) {
3109                         type_recovered = 1;
3110                         filetype = BTRFS_FT_DIR;
3111                 } else if (!list_empty(&rec->orphan_extents)) {
3112                         type_recovered = 1;
3113                         filetype = BTRFS_FT_REG_FILE;
3114                 } else{
3115                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3116                                rec->ino);
3117                         type_recovered = 1;
3118                         filetype = BTRFS_FT_REG_FILE;
3119                 }
3120         }
3121
3122         ret = btrfs_new_inode(trans, root, rec->ino,
3123                               mode | btrfs_type_to_imode(filetype));
3124         if (ret < 0)
3125                 goto out;
3126
3127         /*
3128          * Here inode rebuild is done, we only rebuild the inode item,
3129          * don't repair the nlink(like move to lost+found).
3130          * That is the job of nlink repair.
3131          *
3132          * We just fill the record and return
3133          */
3134         rec->found_dir_item = 1;
3135         rec->imode = mode | btrfs_type_to_imode(filetype);
3136         rec->nlink = 0;
3137         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3138         /* Ensure the inode_nlinks repair function will be called */
3139         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3140 out:
3141         return ret;
3142 }
3143
3144 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3145                                       struct btrfs_root *root,
3146                                       struct btrfs_path *path,
3147                                       struct inode_record *rec)
3148 {
3149         struct orphan_data_extent *orphan;
3150         struct orphan_data_extent *tmp;
3151         int ret = 0;
3152
3153         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3154                 /*
3155                  * Check for conflicting file extents
3156                  *
3157                  * Here we don't know whether the extents is compressed or not,
3158                  * so we can only assume it not compressed nor data offset,
3159                  * and use its disk_len as extent length.
3160                  */
3161                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3162                                        orphan->offset, orphan->disk_len, 0);
3163                 btrfs_release_path(path);
3164                 if (ret < 0)
3165                         goto out;
3166                 if (!ret) {
3167                         fprintf(stderr,
3168                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3169                                 orphan->disk_bytenr, orphan->disk_len);
3170                         ret = btrfs_free_extent(trans,
3171                                         root->fs_info->extent_root,
3172                                         orphan->disk_bytenr, orphan->disk_len,
3173                                         0, root->objectid, orphan->objectid,
3174                                         orphan->offset);
3175                         if (ret < 0)
3176                                 goto out;
3177                 }
3178                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3179                                 orphan->offset, orphan->disk_bytenr,
3180                                 orphan->disk_len, orphan->disk_len);
3181                 if (ret < 0)
3182                         goto out;
3183
3184                 /* Update file size info */
3185                 rec->found_size += orphan->disk_len;
3186                 if (rec->found_size == rec->nbytes)
3187                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3188
3189                 /* Update the file extent hole info too */
3190                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3191                                            orphan->disk_len);
3192                 if (ret < 0)
3193                         goto out;
3194                 if (RB_EMPTY_ROOT(&rec->holes))
3195                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3196
3197                 list_del(&orphan->list);
3198                 free(orphan);
3199         }
3200         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3201 out:
3202         return ret;
3203 }
3204
3205 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3206                                         struct btrfs_root *root,
3207                                         struct btrfs_path *path,
3208                                         struct inode_record *rec)
3209 {
3210         struct rb_node *node;
3211         struct file_extent_hole *hole;
3212         int found = 0;
3213         int ret = 0;
3214
3215         node = rb_first(&rec->holes);
3216
3217         while (node) {
3218                 found = 1;
3219                 hole = rb_entry(node, struct file_extent_hole, node);
3220                 ret = btrfs_punch_hole(trans, root, rec->ino,
3221                                        hole->start, hole->len);
3222                 if (ret < 0)
3223                         goto out;
3224                 ret = del_file_extent_hole(&rec->holes, hole->start,
3225                                            hole->len);
3226                 if (ret < 0)
3227                         goto out;
3228                 if (RB_EMPTY_ROOT(&rec->holes))
3229                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3230                 node = rb_first(&rec->holes);
3231         }
3232         /* special case for a file losing all its file extent */
3233         if (!found) {
3234                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3235                                        round_up(rec->isize, root->sectorsize));
3236                 if (ret < 0)
3237                         goto out;
3238         }
3239         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3240                rec->ino, root->objectid);
3241 out:
3242         return ret;
3243 }
3244
3245 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3246 {
3247         struct btrfs_trans_handle *trans;
3248         struct btrfs_path path;
3249         int ret = 0;
3250
3251         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3252                              I_ERR_NO_ORPHAN_ITEM |
3253                              I_ERR_LINK_COUNT_WRONG |
3254                              I_ERR_NO_INODE_ITEM |
3255                              I_ERR_FILE_EXTENT_ORPHAN |
3256                              I_ERR_FILE_EXTENT_DISCOUNT|
3257                              I_ERR_FILE_NBYTES_WRONG)))
3258                 return rec->errors;
3259
3260         /*
3261          * For nlink repair, it may create a dir and add link, so
3262          * 2 for parent(256)'s dir_index and dir_item
3263          * 2 for lost+found dir's inode_item and inode_ref
3264          * 1 for the new inode_ref of the file
3265          * 2 for lost+found dir's dir_index and dir_item for the file
3266          */
3267         trans = btrfs_start_transaction(root, 7);
3268         if (IS_ERR(trans))
3269                 return PTR_ERR(trans);
3270
3271         btrfs_init_path(&path);
3272         if (rec->errors & I_ERR_NO_INODE_ITEM)
3273                 ret = repair_inode_no_item(trans, root, &path, rec);
3274         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3275                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3276         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3277                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3278         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3279                 ret = repair_inode_isize(trans, root, &path, rec);
3280         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3281                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3282         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3283                 ret = repair_inode_nlinks(trans, root, &path, rec);
3284         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3285                 ret = repair_inode_nbytes(trans, root, &path, rec);
3286         btrfs_commit_transaction(trans, root);
3287         btrfs_release_path(&path);
3288         return ret;
3289 }
3290
3291 static int check_inode_recs(struct btrfs_root *root,
3292                             struct cache_tree *inode_cache)
3293 {
3294         struct cache_extent *cache;
3295         struct ptr_node *node;
3296         struct inode_record *rec;
3297         struct inode_backref *backref;
3298         int stage = 0;
3299         int ret = 0;
3300         int err = 0;
3301         u64 error = 0;
3302         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3303
3304         if (btrfs_root_refs(&root->root_item) == 0) {
3305                 if (!cache_tree_empty(inode_cache))
3306                         fprintf(stderr, "warning line %d\n", __LINE__);
3307                 return 0;
3308         }
3309
3310         /*
3311          * We need to repair backrefs first because we could change some of the
3312          * errors in the inode recs.
3313          *
3314          * We also need to go through and delete invalid backrefs first and then
3315          * add the correct ones second.  We do this because we may get EEXIST
3316          * when adding back the correct index because we hadn't yet deleted the
3317          * invalid index.
3318          *
3319          * For example, if we were missing a dir index then the directories
3320          * isize would be wrong, so if we fixed the isize to what we thought it
3321          * would be and then fixed the backref we'd still have a invalid fs, so
3322          * we need to add back the dir index and then check to see if the isize
3323          * is still wrong.
3324          */
3325         while (stage < 3) {
3326                 stage++;
3327                 if (stage == 3 && !err)
3328                         break;
3329
3330                 cache = search_cache_extent(inode_cache, 0);
3331                 while (repair && cache) {
3332                         node = container_of(cache, struct ptr_node, cache);
3333                         rec = node->data;
3334                         cache = next_cache_extent(cache);
3335
3336                         /* Need to free everything up and rescan */
3337                         if (stage == 3) {
3338                                 remove_cache_extent(inode_cache, &node->cache);
3339                                 free(node);
3340                                 free_inode_rec(rec);
3341                                 continue;
3342                         }
3343
3344                         if (list_empty(&rec->backrefs))
3345                                 continue;
3346
3347                         ret = repair_inode_backrefs(root, rec, inode_cache,
3348                                                     stage == 1);
3349                         if (ret < 0) {
3350                                 err = ret;
3351                                 stage = 2;
3352                                 break;
3353                         } if (ret > 0) {
3354                                 err = -EAGAIN;
3355                         }
3356                 }
3357         }
3358         if (err)
3359                 return err;
3360
3361         rec = get_inode_rec(inode_cache, root_dirid, 0);
3362         BUG_ON(IS_ERR(rec));
3363         if (rec) {
3364                 ret = check_root_dir(rec);
3365                 if (ret) {
3366                         fprintf(stderr, "root %llu root dir %llu error\n",
3367                                 (unsigned long long)root->root_key.objectid,
3368                                 (unsigned long long)root_dirid);
3369                         print_inode_error(root, rec);
3370                         error++;
3371                 }
3372         } else {
3373                 if (repair) {
3374                         struct btrfs_trans_handle *trans;
3375
3376                         trans = btrfs_start_transaction(root, 1);
3377                         if (IS_ERR(trans)) {
3378                                 err = PTR_ERR(trans);
3379                                 return err;
3380                         }
3381
3382                         fprintf(stderr,
3383                                 "root %llu missing its root dir, recreating\n",
3384                                 (unsigned long long)root->objectid);
3385
3386                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3387                         BUG_ON(ret);
3388
3389                         btrfs_commit_transaction(trans, root);
3390                         return -EAGAIN;
3391                 }
3392
3393                 fprintf(stderr, "root %llu root dir %llu not found\n",
3394                         (unsigned long long)root->root_key.objectid,
3395                         (unsigned long long)root_dirid);
3396         }
3397
3398         while (1) {
3399                 cache = search_cache_extent(inode_cache, 0);
3400                 if (!cache)
3401                         break;
3402                 node = container_of(cache, struct ptr_node, cache);
3403                 rec = node->data;
3404                 remove_cache_extent(inode_cache, &node->cache);
3405                 free(node);
3406                 if (rec->ino == root_dirid ||
3407                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3408                         free_inode_rec(rec);
3409                         continue;
3410                 }
3411
3412                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3413                         ret = check_orphan_item(root, rec->ino);
3414                         if (ret == 0)
3415                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3416                         if (can_free_inode_rec(rec)) {
3417                                 free_inode_rec(rec);
3418                                 continue;
3419                         }
3420                 }
3421
3422                 if (!rec->found_inode_item)
3423                         rec->errors |= I_ERR_NO_INODE_ITEM;
3424                 if (rec->found_link != rec->nlink)
3425                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3426                 if (repair) {
3427                         ret = try_repair_inode(root, rec);
3428                         if (ret == 0 && can_free_inode_rec(rec)) {
3429                                 free_inode_rec(rec);
3430                                 continue;
3431                         }
3432                         ret = 0;
3433                 }
3434
3435                 if (!(repair && ret == 0))
3436                         error++;
3437                 print_inode_error(root, rec);
3438                 list_for_each_entry(backref, &rec->backrefs, list) {
3439                         if (!backref->found_dir_item)
3440                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3441                         if (!backref->found_dir_index)
3442                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3443                         if (!backref->found_inode_ref)
3444                                 backref->errors |= REF_ERR_NO_INODE_REF;
3445                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3446                                 " namelen %u name %s filetype %d errors %x",
3447                                 (unsigned long long)backref->dir,
3448                                 (unsigned long long)backref->index,
3449                                 backref->namelen, backref->name,
3450                                 backref->filetype, backref->errors);
3451                         print_ref_error(backref->errors);
3452                 }
3453                 free_inode_rec(rec);
3454         }
3455         return (error > 0) ? -1 : 0;
3456 }
3457
3458 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3459                                         u64 objectid)
3460 {
3461         struct cache_extent *cache;
3462         struct root_record *rec = NULL;
3463         int ret;
3464
3465         cache = lookup_cache_extent(root_cache, objectid, 1);
3466         if (cache) {
3467                 rec = container_of(cache, struct root_record, cache);
3468         } else {
3469                 rec = calloc(1, sizeof(*rec));
3470                 if (!rec)
3471                         return ERR_PTR(-ENOMEM);
3472                 rec->objectid = objectid;
3473                 INIT_LIST_HEAD(&rec->backrefs);
3474                 rec->cache.start = objectid;
3475                 rec->cache.size = 1;
3476
3477                 ret = insert_cache_extent(root_cache, &rec->cache);
3478                 if (ret)
3479                         return ERR_PTR(-EEXIST);
3480         }
3481         return rec;
3482 }
3483
3484 static struct root_backref *get_root_backref(struct root_record *rec,
3485                                              u64 ref_root, u64 dir, u64 index,
3486                                              const char *name, int namelen)
3487 {
3488         struct root_backref *backref;
3489
3490         list_for_each_entry(backref, &rec->backrefs, list) {
3491                 if (backref->ref_root != ref_root || backref->dir != dir ||
3492                     backref->namelen != namelen)
3493                         continue;
3494                 if (memcmp(name, backref->name, namelen))
3495                         continue;
3496                 return backref;
3497         }
3498
3499         backref = calloc(1, sizeof(*backref) + namelen + 1);
3500         if (!backref)
3501                 return NULL;
3502         backref->ref_root = ref_root;
3503         backref->dir = dir;
3504         backref->index = index;
3505         backref->namelen = namelen;
3506         memcpy(backref->name, name, namelen);
3507         backref->name[namelen] = '\0';
3508         list_add_tail(&backref->list, &rec->backrefs);
3509         return backref;
3510 }
3511
3512 static void free_root_record(struct cache_extent *cache)
3513 {
3514         struct root_record *rec;
3515         struct root_backref *backref;
3516
3517         rec = container_of(cache, struct root_record, cache);
3518         while (!list_empty(&rec->backrefs)) {
3519                 backref = to_root_backref(rec->backrefs.next);
3520                 list_del(&backref->list);
3521                 free(backref);
3522         }
3523
3524         free(rec);
3525 }
3526
3527 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3528
3529 static int add_root_backref(struct cache_tree *root_cache,
3530                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3531                             const char *name, int namelen,
3532                             int item_type, int errors)
3533 {
3534         struct root_record *rec;
3535         struct root_backref *backref;
3536
3537         rec = get_root_rec(root_cache, root_id);
3538         BUG_ON(IS_ERR(rec));
3539         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3540         BUG_ON(!backref);
3541
3542         backref->errors |= errors;
3543
3544         if (item_type != BTRFS_DIR_ITEM_KEY) {
3545                 if (backref->found_dir_index || backref->found_back_ref ||
3546                     backref->found_forward_ref) {
3547                         if (backref->index != index)
3548                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3549                 } else {
3550                         backref->index = index;
3551                 }
3552         }
3553
3554         if (item_type == BTRFS_DIR_ITEM_KEY) {
3555                 if (backref->found_forward_ref)
3556                         rec->found_ref++;
3557                 backref->found_dir_item = 1;
3558         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3559                 backref->found_dir_index = 1;
3560         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3561                 if (backref->found_forward_ref)
3562                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3563                 else if (backref->found_dir_item)
3564                         rec->found_ref++;
3565                 backref->found_forward_ref = 1;
3566         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3567                 if (backref->found_back_ref)
3568                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3569                 backref->found_back_ref = 1;
3570         } else {
3571                 BUG_ON(1);
3572         }
3573
3574         if (backref->found_forward_ref && backref->found_dir_item)
3575                 backref->reachable = 1;
3576         return 0;
3577 }
3578
3579 static int merge_root_recs(struct btrfs_root *root,
3580                            struct cache_tree *src_cache,
3581                            struct cache_tree *dst_cache)
3582 {
3583         struct cache_extent *cache;
3584         struct ptr_node *node;
3585         struct inode_record *rec;
3586         struct inode_backref *backref;
3587         int ret = 0;
3588
3589         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3590                 free_inode_recs_tree(src_cache);
3591                 return 0;
3592         }
3593
3594         while (1) {
3595                 cache = search_cache_extent(src_cache, 0);
3596                 if (!cache)
3597                         break;
3598                 node = container_of(cache, struct ptr_node, cache);
3599                 rec = node->data;
3600                 remove_cache_extent(src_cache, &node->cache);
3601                 free(node);
3602
3603                 ret = is_child_root(root, root->objectid, rec->ino);
3604                 if (ret < 0)
3605                         break;
3606                 else if (ret == 0)
3607                         goto skip;
3608
3609                 list_for_each_entry(backref, &rec->backrefs, list) {
3610                         BUG_ON(backref->found_inode_ref);
3611                         if (backref->found_dir_item)
3612                                 add_root_backref(dst_cache, rec->ino,
3613                                         root->root_key.objectid, backref->dir,
3614                                         backref->index, backref->name,
3615                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3616                                         backref->errors);
3617                         if (backref->found_dir_index)
3618                                 add_root_backref(dst_cache, rec->ino,
3619                                         root->root_key.objectid, backref->dir,
3620                                         backref->index, backref->name,
3621                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3622                                         backref->errors);
3623                 }
3624 skip:
3625                 free_inode_rec(rec);
3626         }
3627         if (ret < 0)
3628                 return ret;
3629         return 0;
3630 }
3631
3632 static int check_root_refs(struct btrfs_root *root,
3633                            struct cache_tree *root_cache)
3634 {
3635         struct root_record *rec;
3636         struct root_record *ref_root;
3637         struct root_backref *backref;
3638         struct cache_extent *cache;
3639         int loop = 1;
3640         int ret;
3641         int error;
3642         int errors = 0;
3643
3644         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3645         BUG_ON(IS_ERR(rec));
3646         rec->found_ref = 1;
3647
3648         /* fixme: this can not detect circular references */
3649         while (loop) {
3650                 loop = 0;
3651                 cache = search_cache_extent(root_cache, 0);
3652                 while (1) {
3653                         if (!cache)
3654                                 break;
3655                         rec = container_of(cache, struct root_record, cache);
3656                         cache = next_cache_extent(cache);
3657
3658                         if (rec->found_ref == 0)
3659                                 continue;
3660
3661                         list_for_each_entry(backref, &rec->backrefs, list) {
3662                                 if (!backref->reachable)
3663                                         continue;
3664
3665                                 ref_root = get_root_rec(root_cache,
3666                                                         backref->ref_root);
3667                                 BUG_ON(IS_ERR(ref_root));
3668                                 if (ref_root->found_ref > 0)
3669                                         continue;
3670
3671                                 backref->reachable = 0;
3672                                 rec->found_ref--;
3673                                 if (rec->found_ref == 0)
3674                                         loop = 1;
3675                         }
3676                 }
3677         }
3678
3679         cache = search_cache_extent(root_cache, 0);
3680         while (1) {
3681                 if (!cache)
3682                         break;
3683                 rec = container_of(cache, struct root_record, cache);
3684                 cache = next_cache_extent(cache);
3685
3686                 if (rec->found_ref == 0 &&
3687                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3688                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3689                         ret = check_orphan_item(root->fs_info->tree_root,
3690                                                 rec->objectid);
3691                         if (ret == 0)
3692                                 continue;
3693
3694                         /*
3695                          * If we don't have a root item then we likely just have
3696                          * a dir item in a snapshot for this root but no actual
3697                          * ref key or anything so it's meaningless.
3698                          */
3699                         if (!rec->found_root_item)
3700                                 continue;
3701                         errors++;
3702                         fprintf(stderr, "fs tree %llu not referenced\n",
3703                                 (unsigned long long)rec->objectid);
3704                 }
3705
3706                 error = 0;
3707                 if (rec->found_ref > 0 && !rec->found_root_item)
3708                         error = 1;
3709                 list_for_each_entry(backref, &rec->backrefs, list) {
3710                         if (!backref->found_dir_item)
3711                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3712                         if (!backref->found_dir_index)
3713                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3714                         if (!backref->found_back_ref)
3715                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3716                         if (!backref->found_forward_ref)
3717                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3718                         if (backref->reachable && backref->errors)
3719                                 error = 1;
3720                 }
3721                 if (!error)
3722                         continue;
3723
3724                 errors++;
3725                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3726                         (unsigned long long)rec->objectid, rec->found_ref,
3727                          rec->found_root_item ? "" : "not found");
3728
3729                 list_for_each_entry(backref, &rec->backrefs, list) {
3730                         if (!backref->reachable)
3731                                 continue;
3732                         if (!backref->errors && rec->found_root_item)
3733                                 continue;
3734                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3735                                 " index %llu namelen %u name %s errors %x\n",
3736                                 (unsigned long long)backref->ref_root,
3737                                 (unsigned long long)backref->dir,
3738                                 (unsigned long long)backref->index,
3739                                 backref->namelen, backref->name,
3740                                 backref->errors);
3741                         print_ref_error(backref->errors);
3742                 }
3743         }
3744         return errors > 0 ? 1 : 0;
3745 }
3746
3747 static int process_root_ref(struct extent_buffer *eb, int slot,
3748                             struct btrfs_key *key,
3749                             struct cache_tree *root_cache)
3750 {
3751         u64 dirid;
3752         u64 index;
3753         u32 len;
3754         u32 name_len;
3755         struct btrfs_root_ref *ref;
3756         char namebuf[BTRFS_NAME_LEN];
3757         int error;
3758
3759         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3760
3761         dirid = btrfs_root_ref_dirid(eb, ref);
3762         index = btrfs_root_ref_sequence(eb, ref);
3763         name_len = btrfs_root_ref_name_len(eb, ref);
3764
3765         if (name_len <= BTRFS_NAME_LEN) {
3766                 len = name_len;
3767                 error = 0;
3768         } else {
3769                 len = BTRFS_NAME_LEN;
3770                 error = REF_ERR_NAME_TOO_LONG;
3771         }
3772         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3773
3774         if (key->type == BTRFS_ROOT_REF_KEY) {
3775                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3776                                  index, namebuf, len, key->type, error);
3777         } else {
3778                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3779                                  index, namebuf, len, key->type, error);
3780         }
3781         return 0;
3782 }
3783
3784 static void free_corrupt_block(struct cache_extent *cache)
3785 {
3786         struct btrfs_corrupt_block *corrupt;
3787
3788         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3789         free(corrupt);
3790 }
3791
3792 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3793
3794 /*
3795  * Repair the btree of the given root.
3796  *
3797  * The fix is to remove the node key in corrupt_blocks cache_tree.
3798  * and rebalance the tree.
3799  * After the fix, the btree should be writeable.
3800  */
3801 static int repair_btree(struct btrfs_root *root,
3802                         struct cache_tree *corrupt_blocks)
3803 {
3804         struct btrfs_trans_handle *trans;
3805         struct btrfs_path path;
3806         struct btrfs_corrupt_block *corrupt;
3807         struct cache_extent *cache;
3808         struct btrfs_key key;
3809         u64 offset;
3810         int level;
3811         int ret = 0;
3812
3813         if (cache_tree_empty(corrupt_blocks))
3814                 return 0;
3815
3816         trans = btrfs_start_transaction(root, 1);
3817         if (IS_ERR(trans)) {
3818                 ret = PTR_ERR(trans);
3819                 fprintf(stderr, "Error starting transaction: %s\n",
3820                         strerror(-ret));
3821                 return ret;
3822         }
3823         btrfs_init_path(&path);
3824         cache = first_cache_extent(corrupt_blocks);
3825         while (cache) {
3826                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3827                                        cache);
3828                 level = corrupt->level;
3829                 path.lowest_level = level;
3830                 key.objectid = corrupt->key.objectid;
3831                 key.type = corrupt->key.type;
3832                 key.offset = corrupt->key.offset;
3833
3834                 /*
3835                  * Here we don't want to do any tree balance, since it may
3836                  * cause a balance with corrupted brother leaf/node,
3837                  * so ins_len set to 0 here.
3838                  * Balance will be done after all corrupt node/leaf is deleted.
3839                  */
3840                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3841                 if (ret < 0)
3842                         goto out;
3843                 offset = btrfs_node_blockptr(path.nodes[level],
3844                                              path.slots[level]);
3845
3846                 /* Remove the ptr */
3847                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3848                 if (ret < 0)
3849                         goto out;
3850                 /*
3851                  * Remove the corresponding extent
3852                  * return value is not concerned.
3853                  */
3854                 btrfs_release_path(&path);
3855                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3856                                         0, root->root_key.objectid,
3857                                         level - 1, 0);
3858                 cache = next_cache_extent(cache);
3859         }
3860
3861         /* Balance the btree using btrfs_search_slot() */
3862         cache = first_cache_extent(corrupt_blocks);
3863         while (cache) {
3864                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3865                                        cache);
3866                 memcpy(&key, &corrupt->key, sizeof(key));
3867                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3868                 if (ret < 0)
3869                         goto out;
3870                 /* return will always >0 since it won't find the item */
3871                 ret = 0;
3872                 btrfs_release_path(&path);
3873                 cache = next_cache_extent(cache);
3874         }
3875 out:
3876         btrfs_commit_transaction(trans, root);
3877         btrfs_release_path(&path);
3878         return ret;
3879 }
3880
3881 static int check_fs_root(struct btrfs_root *root,
3882                          struct cache_tree *root_cache,
3883                          struct walk_control *wc)
3884 {
3885         int ret = 0;
3886         int err = 0;
3887         int wret;
3888         int level;
3889         struct btrfs_path path;
3890         struct shared_node root_node;
3891         struct root_record *rec;
3892         struct btrfs_root_item *root_item = &root->root_item;
3893         struct cache_tree corrupt_blocks;
3894         struct orphan_data_extent *orphan;
3895         struct orphan_data_extent *tmp;
3896         enum btrfs_tree_block_status status;
3897         struct node_refs nrefs;
3898
3899         /*
3900          * Reuse the corrupt_block cache tree to record corrupted tree block
3901          *
3902          * Unlike the usage in extent tree check, here we do it in a per
3903          * fs/subvol tree base.
3904          */
3905         cache_tree_init(&corrupt_blocks);
3906         root->fs_info->corrupt_blocks = &corrupt_blocks;
3907
3908         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3909                 rec = get_root_rec(root_cache, root->root_key.objectid);
3910                 BUG_ON(IS_ERR(rec));
3911                 if (btrfs_root_refs(root_item) > 0)
3912                         rec->found_root_item = 1;
3913         }
3914
3915         btrfs_init_path(&path);
3916         memset(&root_node, 0, sizeof(root_node));
3917         cache_tree_init(&root_node.root_cache);
3918         cache_tree_init(&root_node.inode_cache);
3919         memset(&nrefs, 0, sizeof(nrefs));
3920
3921         /* Move the orphan extent record to corresponding inode_record */
3922         list_for_each_entry_safe(orphan, tmp,
3923                                  &root->orphan_data_extents, list) {
3924                 struct inode_record *inode;
3925
3926                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3927                                       1);
3928                 BUG_ON(IS_ERR(inode));
3929                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3930                 list_move(&orphan->list, &inode->orphan_extents);
3931         }
3932
3933         level = btrfs_header_level(root->node);
3934         memset(wc->nodes, 0, sizeof(wc->nodes));
3935         wc->nodes[level] = &root_node;
3936         wc->active_node = level;
3937         wc->root_level = level;
3938
3939         /* We may not have checked the root block, lets do that now */
3940         if (btrfs_is_leaf(root->node))
3941                 status = btrfs_check_leaf(root, NULL, root->node);
3942         else
3943                 status = btrfs_check_node(root, NULL, root->node);
3944         if (status != BTRFS_TREE_BLOCK_CLEAN)
3945                 return -EIO;
3946
3947         if (btrfs_root_refs(root_item) > 0 ||
3948             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3949                 path.nodes[level] = root->node;
3950                 extent_buffer_get(root->node);
3951                 path.slots[level] = 0;
3952         } else {
3953                 struct btrfs_key key;
3954                 struct btrfs_disk_key found_key;
3955
3956                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3957                 level = root_item->drop_level;
3958                 path.lowest_level = level;
3959                 if (level > btrfs_header_level(root->node) ||
3960                     level >= BTRFS_MAX_LEVEL) {
3961                         error("ignoring invalid drop level: %u", level);
3962                         goto skip_walking;
3963                 }
3964                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3965                 if (wret < 0)
3966                         goto skip_walking;
3967                 btrfs_node_key(path.nodes[level], &found_key,
3968                                 path.slots[level]);
3969                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3970                                         sizeof(found_key)));
3971         }
3972
3973         while (1) {
3974                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3975                 if (wret < 0)
3976                         ret = wret;
3977                 if (wret != 0)
3978                         break;
3979
3980                 wret = walk_up_tree(root, &path, wc, &level);
3981                 if (wret < 0)
3982                         ret = wret;
3983                 if (wret != 0)
3984                         break;
3985         }
3986 skip_walking:
3987         btrfs_release_path(&path);
3988
3989         if (!cache_tree_empty(&corrupt_blocks)) {
3990                 struct cache_extent *cache;
3991                 struct btrfs_corrupt_block *corrupt;
3992
3993                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3994                        root->root_key.objectid);
3995                 cache = first_cache_extent(&corrupt_blocks);
3996                 while (cache) {
3997                         corrupt = container_of(cache,
3998                                                struct btrfs_corrupt_block,
3999                                                cache);
4000                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4001                                cache->start, corrupt->level,
4002                                corrupt->key.objectid, corrupt->key.type,
4003                                corrupt->key.offset);
4004                         cache = next_cache_extent(cache);
4005                 }
4006                 if (repair) {
4007                         printf("Try to repair the btree for root %llu\n",
4008                                root->root_key.objectid);
4009                         ret = repair_btree(root, &corrupt_blocks);
4010                         if (ret < 0)
4011                                 fprintf(stderr, "Failed to repair btree: %s\n",
4012                                         strerror(-ret));
4013                         if (!ret)
4014                                 printf("Btree for root %llu is fixed\n",
4015                                        root->root_key.objectid);
4016                 }
4017         }
4018
4019         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4020         if (err < 0)
4021                 ret = err;
4022
4023         if (root_node.current) {
4024                 root_node.current->checked = 1;
4025                 maybe_free_inode_rec(&root_node.inode_cache,
4026                                 root_node.current);
4027         }
4028
4029         err = check_inode_recs(root, &root_node.inode_cache);
4030         if (!ret)
4031                 ret = err;
4032
4033         free_corrupt_blocks_tree(&corrupt_blocks);
4034         root->fs_info->corrupt_blocks = NULL;
4035         free_orphan_data_extents(&root->orphan_data_extents);
4036         return ret;
4037 }
4038
4039 static int fs_root_objectid(u64 objectid)
4040 {
4041         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4042             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4043                 return 1;
4044         return is_fstree(objectid);
4045 }
4046
4047 static int check_fs_roots(struct btrfs_root *root,
4048                           struct cache_tree *root_cache)
4049 {
4050         struct btrfs_path path;
4051         struct btrfs_key key;
4052         struct walk_control wc;
4053         struct extent_buffer *leaf, *tree_node;
4054         struct btrfs_root *tmp_root;
4055         struct btrfs_root *tree_root = root->fs_info->tree_root;
4056         int ret;
4057         int err = 0;
4058
4059         if (ctx.progress_enabled) {
4060                 ctx.tp = TASK_FS_ROOTS;
4061                 task_start(ctx.info);
4062         }
4063
4064         /*
4065          * Just in case we made any changes to the extent tree that weren't
4066          * reflected into the free space cache yet.
4067          */
4068         if (repair)
4069                 reset_cached_block_groups(root->fs_info);
4070         memset(&wc, 0, sizeof(wc));
4071         cache_tree_init(&wc.shared);
4072         btrfs_init_path(&path);
4073
4074 again:
4075         key.offset = 0;
4076         key.objectid = 0;
4077         key.type = BTRFS_ROOT_ITEM_KEY;
4078         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4079         if (ret < 0) {
4080                 err = 1;
4081                 goto out;
4082         }
4083         tree_node = tree_root->node;
4084         while (1) {
4085                 if (tree_node != tree_root->node) {
4086                         free_root_recs_tree(root_cache);
4087                         btrfs_release_path(&path);
4088                         goto again;
4089                 }
4090                 leaf = path.nodes[0];
4091                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4092                         ret = btrfs_next_leaf(tree_root, &path);
4093                         if (ret) {
4094                                 if (ret < 0)
4095                                         err = 1;
4096                                 break;
4097                         }
4098                         leaf = path.nodes[0];
4099                 }
4100                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4101                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4102                     fs_root_objectid(key.objectid)) {
4103                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4104                                 tmp_root = btrfs_read_fs_root_no_cache(
4105                                                 root->fs_info, &key);
4106                         } else {
4107                                 key.offset = (u64)-1;
4108                                 tmp_root = btrfs_read_fs_root(
4109                                                 root->fs_info, &key);
4110                         }
4111                         if (IS_ERR(tmp_root)) {
4112                                 err = 1;
4113                                 goto next;
4114                         }
4115                         ret = check_fs_root(tmp_root, root_cache, &wc);
4116                         if (ret == -EAGAIN) {
4117                                 free_root_recs_tree(root_cache);
4118                                 btrfs_release_path(&path);
4119                                 goto again;
4120                         }
4121                         if (ret)
4122                                 err = 1;
4123                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4124                                 btrfs_free_fs_root(tmp_root);
4125                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4126                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4127                         process_root_ref(leaf, path.slots[0], &key,
4128                                          root_cache);
4129                 }
4130 next:
4131                 path.slots[0]++;
4132         }
4133 out:
4134         btrfs_release_path(&path);
4135         if (err)
4136                 free_extent_cache_tree(&wc.shared);
4137         if (!cache_tree_empty(&wc.shared))
4138                 fprintf(stderr, "warning line %d\n", __LINE__);
4139
4140         task_stop(ctx.info);
4141
4142         return err;
4143 }
4144
4145 /*
4146  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4147  * INODE_REF/INODE_EXTREF match.
4148  *
4149  * @root:       the root of the fs/file tree
4150  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4151  * @key:        the key of the DIR_ITEM/DIR_INDEX
4152  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4153  *              distinguish root_dir between normal dir/file
4154  * @name:       the name in the INODE_REF/INODE_EXTREF
4155  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4156  * @mode:       the st_mode of INODE_ITEM
4157  *
4158  * Return 0 if no error occurred.
4159  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4160  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4161  * dir/file.
4162  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4163  * not match for normal dir/file.
4164  */
4165 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4166                          struct btrfs_key *key, u64 index, char *name,
4167                          u32 namelen, u32 mode)
4168 {
4169         struct btrfs_path path;
4170         struct extent_buffer *node;
4171         struct btrfs_dir_item *di;
4172         struct btrfs_key location;
4173         char namebuf[BTRFS_NAME_LEN] = {0};
4174         u32 total;
4175         u32 cur = 0;
4176         u32 len;
4177         u32 name_len;
4178         u32 data_len;
4179         u8 filetype;
4180         int slot;
4181         int ret;
4182
4183         btrfs_init_path(&path);
4184         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4185         if (ret < 0) {
4186                 ret = DIR_ITEM_MISSING;
4187                 goto out;
4188         }
4189
4190         /* Process root dir and goto out*/
4191         if (index == 0) {
4192                 if (ret == 0) {
4193                         ret = ROOT_DIR_ERROR;
4194                         error(
4195                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4196                                 root->objectid,
4197                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4198                                         "REF" : "EXTREF",
4199                                 ref_key->objectid, ref_key->offset,
4200                                 key->type == BTRFS_DIR_ITEM_KEY ?
4201                                         "DIR_ITEM" : "DIR_INDEX");
4202                 } else {
4203                         ret = 0;
4204                 }
4205
4206                 goto out;
4207         }
4208
4209         /* Process normal file/dir */
4210         if (ret > 0) {
4211                 ret = DIR_ITEM_MISSING;
4212                 error(
4213                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4214                         root->objectid,
4215                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4216                         ref_key->objectid, ref_key->offset,
4217                         key->type == BTRFS_DIR_ITEM_KEY ?
4218                                 "DIR_ITEM" : "DIR_INDEX",
4219                         key->objectid, key->offset, namelen, name,
4220                         imode_to_type(mode));
4221                 goto out;
4222         }
4223
4224         /* Check whether inode_id/filetype/name match */
4225         node = path.nodes[0];
4226         slot = path.slots[0];
4227         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4228         total = btrfs_item_size_nr(node, slot);
4229         while (cur < total) {
4230                 ret = DIR_ITEM_MISMATCH;
4231                 name_len = btrfs_dir_name_len(node, di);
4232                 data_len = btrfs_dir_data_len(node, di);
4233
4234                 btrfs_dir_item_key_to_cpu(node, di, &location);
4235                 if (location.objectid != ref_key->objectid ||
4236                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4237                     location.offset != 0)
4238                         goto next;
4239
4240                 filetype = btrfs_dir_type(node, di);
4241                 if (imode_to_type(mode) != filetype)
4242                         goto next;
4243
4244                 if (cur + sizeof(*di) + name_len > total ||
4245                     name_len > BTRFS_NAME_LEN) {
4246                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4247                                 root->objectid,
4248                                 key->type == BTRFS_DIR_ITEM_KEY ?
4249                                 "DIR_ITEM" : "DIR_INDEX",
4250                                 key->objectid, key->offset, name_len);
4251
4252                         if (cur + sizeof(*di) > total)
4253                                 break;
4254                         len = min_t(u32, total - cur - sizeof(*di),
4255                                     BTRFS_NAME_LEN);
4256                 } else {
4257                         len = name_len;
4258                 }
4259
4260                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4261                 if (len != namelen || strncmp(namebuf, name, len))
4262                         goto next;
4263
4264                 ret = 0;
4265                 goto out;
4266 next:
4267                 len = sizeof(*di) + name_len + data_len;
4268                 di = (struct btrfs_dir_item *)((char *)di + len);
4269                 cur += len;
4270         }
4271         if (ret == DIR_ITEM_MISMATCH)
4272                 error(
4273                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4274                         root->objectid,
4275                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4276                         ref_key->objectid, ref_key->offset,
4277                         key->type == BTRFS_DIR_ITEM_KEY ?
4278                                 "DIR_ITEM" : "DIR_INDEX",
4279                         key->objectid, key->offset, namelen, name,
4280                         imode_to_type(mode));
4281 out:
4282         btrfs_release_path(&path);
4283         return ret;
4284 }
4285
4286 /*
4287  * Traverse the given INODE_REF and call find_dir_item() to find related
4288  * DIR_ITEM/DIR_INDEX.
4289  *
4290  * @root:       the root of the fs/file tree
4291  * @ref_key:    the key of the INODE_REF
4292  * @refs:       the count of INODE_REF
4293  * @mode:       the st_mode of INODE_ITEM
4294  *
4295  * Return 0 if no error occurred.
4296  */
4297 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4298                            struct extent_buffer *node, int slot, u64 *refs,
4299                            int mode)
4300 {
4301         struct btrfs_key key;
4302         struct btrfs_inode_ref *ref;
4303         char namebuf[BTRFS_NAME_LEN] = {0};
4304         u32 total;
4305         u32 cur = 0;
4306         u32 len;
4307         u32 name_len;
4308         u64 index;
4309         int ret, err = 0;
4310
4311         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4312         total = btrfs_item_size_nr(node, slot);
4313
4314 next:
4315         /* Update inode ref count */
4316         (*refs)++;
4317
4318         index = btrfs_inode_ref_index(node, ref);
4319         name_len = btrfs_inode_ref_name_len(node, ref);
4320         if (cur + sizeof(*ref) + name_len > total ||
4321             name_len > BTRFS_NAME_LEN) {
4322                 warning("root %llu INODE_REF[%llu %llu] name too long",
4323                         root->objectid, ref_key->objectid, ref_key->offset);
4324
4325                 if (total < cur + sizeof(*ref))
4326                         goto out;
4327                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4328         } else {
4329                 len = name_len;
4330         }
4331
4332         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4333
4334         /* Check root dir ref name */
4335         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4336                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4337                       root->objectid, ref_key->objectid, ref_key->offset,
4338                       namebuf);
4339                 err |= ROOT_DIR_ERROR;
4340         }
4341
4342         /* Find related DIR_INDEX */
4343         key.objectid = ref_key->offset;
4344         key.type = BTRFS_DIR_INDEX_KEY;
4345         key.offset = index;
4346         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4347         err |= ret;
4348
4349         /* Find related dir_item */
4350         key.objectid = ref_key->offset;
4351         key.type = BTRFS_DIR_ITEM_KEY;
4352         key.offset = btrfs_name_hash(namebuf, len);
4353         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4354         err |= ret;
4355
4356         len = sizeof(*ref) + name_len;
4357         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4358         cur += len;
4359         if (cur < total)
4360                 goto next;
4361
4362 out:
4363         return err;
4364 }
4365
4366 /*
4367  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4368  * DIR_ITEM/DIR_INDEX.
4369  *
4370  * @root:       the root of the fs/file tree
4371  * @ref_key:    the key of the INODE_EXTREF
4372  * @refs:       the count of INODE_EXTREF
4373  * @mode:       the st_mode of INODE_ITEM
4374  *
4375  * Return 0 if no error occurred.
4376  */
4377 static int check_inode_extref(struct btrfs_root *root,
4378                               struct btrfs_key *ref_key,
4379                               struct extent_buffer *node, int slot, u64 *refs,
4380                               int mode)
4381 {
4382         struct btrfs_key key;
4383         struct btrfs_inode_extref *extref;
4384         char namebuf[BTRFS_NAME_LEN] = {0};
4385         u32 total;
4386         u32 cur = 0;
4387         u32 len;
4388         u32 name_len;
4389         u64 index;
4390         u64 parent;
4391         int ret;
4392         int err = 0;
4393
4394         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4395         total = btrfs_item_size_nr(node, slot);
4396
4397 next:
4398         /* update inode ref count */
4399         (*refs)++;
4400         name_len = btrfs_inode_extref_name_len(node, extref);
4401         index = btrfs_inode_extref_index(node, extref);
4402         parent = btrfs_inode_extref_parent(node, extref);
4403         if (name_len <= BTRFS_NAME_LEN) {
4404                 len = name_len;
4405         } else {
4406                 len = BTRFS_NAME_LEN;
4407                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4408                         root->objectid, ref_key->objectid, ref_key->offset);
4409         }
4410         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4411
4412         /* Check root dir ref name */
4413         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4414                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4415                       root->objectid, ref_key->objectid, ref_key->offset,
4416                       namebuf);
4417                 err |= ROOT_DIR_ERROR;
4418         }
4419
4420         /* find related dir_index */
4421         key.objectid = parent;
4422         key.type = BTRFS_DIR_INDEX_KEY;
4423         key.offset = index;
4424         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4425         err |= ret;
4426
4427         /* find related dir_item */
4428         key.objectid = parent;
4429         key.type = BTRFS_DIR_ITEM_KEY;
4430         key.offset = btrfs_name_hash(namebuf, len);
4431         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4432         err |= ret;
4433
4434         len = sizeof(*extref) + name_len;
4435         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4436         cur += len;
4437
4438         if (cur < total)
4439                 goto next;
4440
4441         return err;
4442 }
4443
4444 /*
4445  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4446  * DIR_ITEM/DIR_INDEX match.
4447  *
4448  * @root:       the root of the fs/file tree
4449  * @key:        the key of the INODE_REF/INODE_EXTREF
4450  * @name:       the name in the INODE_REF/INODE_EXTREF
4451  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4452  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4453  * to (u64)-1
4454  * @ext_ref:    the EXTENDED_IREF feature
4455  *
4456  * Return 0 if no error occurred.
4457  * Return >0 for error bitmap
4458  */
4459 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4460                           char *name, int namelen, u64 index,
4461                           unsigned int ext_ref)
4462 {
4463         struct btrfs_path path;
4464         struct btrfs_inode_ref *ref;
4465         struct btrfs_inode_extref *extref;
4466         struct extent_buffer *node;
4467         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4468         u32 total;
4469         u32 cur = 0;
4470         u32 len;
4471         u32 ref_namelen;
4472         u64 ref_index;
4473         u64 parent;
4474         u64 dir_id;
4475         int slot;
4476         int ret;
4477
4478         btrfs_init_path(&path);
4479         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4480         if (ret) {
4481                 ret = INODE_REF_MISSING;
4482                 goto extref;
4483         }
4484
4485         node = path.nodes[0];
4486         slot = path.slots[0];
4487
4488         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4489         total = btrfs_item_size_nr(node, slot);
4490
4491         /* Iterate all entry of INODE_REF */
4492         while (cur < total) {
4493                 ret = INODE_REF_MISSING;
4494
4495                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4496                 ref_index = btrfs_inode_ref_index(node, ref);
4497                 if (index != (u64)-1 && index != ref_index)
4498                         goto next_ref;
4499
4500                 if (cur + sizeof(*ref) + ref_namelen > total ||
4501                     ref_namelen > BTRFS_NAME_LEN) {
4502                         warning("root %llu INODE %s[%llu %llu] name too long",
4503                                 root->objectid,
4504                                 key->type == BTRFS_INODE_REF_KEY ?
4505                                         "REF" : "EXTREF",
4506                                 key->objectid, key->offset);
4507
4508                         if (cur + sizeof(*ref) > total)
4509                                 break;
4510                         len = min_t(u32, total - cur - sizeof(*ref),
4511                                     BTRFS_NAME_LEN);
4512                 } else {
4513                         len = ref_namelen;
4514                 }
4515
4516                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4517                                    len);
4518
4519                 if (len != namelen || strncmp(ref_namebuf, name, len))
4520                         goto next_ref;
4521
4522                 ret = 0;
4523                 goto out;
4524 next_ref:
4525                 len = sizeof(*ref) + ref_namelen;
4526                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4527                 cur += len;
4528         }
4529
4530 extref:
4531         /* Skip if not support EXTENDED_IREF feature */
4532         if (!ext_ref)
4533                 goto out;
4534
4535         btrfs_release_path(&path);
4536         btrfs_init_path(&path);
4537
4538         dir_id = key->offset;
4539         key->type = BTRFS_INODE_EXTREF_KEY;
4540         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4541
4542         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4543         if (ret) {
4544                 ret = INODE_REF_MISSING;
4545                 goto out;
4546         }
4547
4548         node = path.nodes[0];
4549         slot = path.slots[0];
4550
4551         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4552         cur = 0;
4553         total = btrfs_item_size_nr(node, slot);
4554
4555         /* Iterate all entry of INODE_EXTREF */
4556         while (cur < total) {
4557                 ret = INODE_REF_MISSING;
4558
4559                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4560                 ref_index = btrfs_inode_extref_index(node, extref);
4561                 parent = btrfs_inode_extref_parent(node, extref);
4562                 if (index != (u64)-1 && index != ref_index)
4563                         goto next_extref;
4564
4565                 if (parent != dir_id)
4566                         goto next_extref;
4567
4568                 if (ref_namelen <= BTRFS_NAME_LEN) {
4569                         len = ref_namelen;
4570                 } else {
4571                         len = BTRFS_NAME_LEN;
4572                         warning("root %llu INODE %s[%llu %llu] name too long",
4573                                 root->objectid,
4574                                 key->type == BTRFS_INODE_REF_KEY ?
4575                                         "REF" : "EXTREF",
4576                                 key->objectid, key->offset);
4577                 }
4578                 read_extent_buffer(node, ref_namebuf,
4579                                    (unsigned long)(extref + 1), len);
4580
4581                 if (len != namelen || strncmp(ref_namebuf, name, len))
4582                         goto next_extref;
4583
4584                 ret = 0;
4585                 goto out;
4586
4587 next_extref:
4588                 len = sizeof(*extref) + ref_namelen;
4589                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4590                 cur += len;
4591
4592         }
4593 out:
4594         btrfs_release_path(&path);
4595         return ret;
4596 }
4597
4598 /*
4599  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4600  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4601  *
4602  * @root:       the root of the fs/file tree
4603  * @key:        the key of the INODE_REF/INODE_EXTREF
4604  * @size:       the st_size of the INODE_ITEM
4605  * @ext_ref:    the EXTENDED_IREF feature
4606  *
4607  * Return 0 if no error occurred.
4608  */
4609 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4610                           struct extent_buffer *node, int slot, u64 *size,
4611                           unsigned int ext_ref)
4612 {
4613         struct btrfs_dir_item *di;
4614         struct btrfs_inode_item *ii;
4615         struct btrfs_path path;
4616         struct btrfs_key location;
4617         char namebuf[BTRFS_NAME_LEN] = {0};
4618         u32 total;
4619         u32 cur = 0;
4620         u32 len;
4621         u32 name_len;
4622         u32 data_len;
4623         u8 filetype;
4624         u32 mode;
4625         u64 index;
4626         int ret;
4627         int err = 0;
4628
4629         /*
4630          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4631          * ignore index check.
4632          */
4633         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4634
4635         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4636         total = btrfs_item_size_nr(node, slot);
4637
4638         while (cur < total) {
4639                 data_len = btrfs_dir_data_len(node, di);
4640                 if (data_len)
4641                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4642                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4643                               "DIR_ITEM" : "DIR_INDEX",
4644                               key->objectid, key->offset, data_len);
4645
4646                 name_len = btrfs_dir_name_len(node, di);
4647                 if (cur + sizeof(*di) + name_len > total ||
4648                     name_len > BTRFS_NAME_LEN) {
4649                         warning("root %llu %s[%llu %llu] name too long",
4650                                 root->objectid,
4651                                 key->type == BTRFS_DIR_ITEM_KEY ?
4652                                 "DIR_ITEM" : "DIR_INDEX",
4653                                 key->objectid, key->offset);
4654
4655                         if (cur + sizeof(*di) > total)
4656                                 break;
4657                         len = min_t(u32, total - cur - sizeof(*di),
4658                                     BTRFS_NAME_LEN);
4659                 } else {
4660                         len = name_len;
4661                 }
4662                 (*size) += name_len;
4663
4664                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4665                 filetype = btrfs_dir_type(node, di);
4666
4667                 btrfs_init_path(&path);
4668                 btrfs_dir_item_key_to_cpu(node, di, &location);
4669
4670                 /* Ignore related ROOT_ITEM check */
4671                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4672                         goto next;
4673
4674                 /* Check relative INODE_ITEM(existence/filetype) */
4675                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4676                 if (ret) {
4677                         err |= INODE_ITEM_MISSING;
4678                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4679                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4680                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4681                               key->offset, location.objectid, name_len,
4682                               namebuf, filetype);
4683                         goto next;
4684                 }
4685
4686                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4687                                     struct btrfs_inode_item);
4688                 mode = btrfs_inode_mode(path.nodes[0], ii);
4689
4690                 if (imode_to_type(mode) != filetype) {
4691                         err |= INODE_ITEM_MISMATCH;
4692                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4693                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4694                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4695                               key->offset, name_len, namebuf, filetype);
4696                 }
4697
4698                 /* Check relative INODE_REF/INODE_EXTREF */
4699                 location.type = BTRFS_INODE_REF_KEY;
4700                 location.offset = key->objectid;
4701                 ret = find_inode_ref(root, &location, namebuf, len,
4702                                        index, ext_ref);
4703                 err |= ret;
4704                 if (ret & INODE_REF_MISSING)
4705                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4706                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4707                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4708                               key->offset, name_len, namebuf, filetype);
4709
4710 next:
4711                 btrfs_release_path(&path);
4712                 len = sizeof(*di) + name_len + data_len;
4713                 di = (struct btrfs_dir_item *)((char *)di + len);
4714                 cur += len;
4715
4716                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4717                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4718                               root->objectid, key->objectid, key->offset);
4719                         break;
4720                 }
4721         }
4722
4723         return err;
4724 }
4725
4726 /*
4727  * Check file extent datasum/hole, update the size of the file extents,
4728  * check and update the last offset of the file extent.
4729  *
4730  * @root:       the root of fs/file tree.
4731  * @fkey:       the key of the file extent.
4732  * @nodatasum:  INODE_NODATASUM feature.
4733  * @size:       the sum of all EXTENT_DATA items size for this inode.
4734  * @end:        the offset of the last extent.
4735  *
4736  * Return 0 if no error occurred.
4737  */
4738 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4739                              struct extent_buffer *node, int slot,
4740                              unsigned int nodatasum, u64 *size, u64 *end)
4741 {
4742         struct btrfs_file_extent_item *fi;
4743         u64 disk_bytenr;
4744         u64 disk_num_bytes;
4745         u64 extent_num_bytes;
4746         u64 extent_offset;
4747         u64 csum_found;         /* In byte size, sectorsize aligned */
4748         u64 search_start;       /* Logical range start we search for csum */
4749         u64 search_len;         /* Logical range len we search for csum */
4750         unsigned int extent_type;
4751         unsigned int is_hole;
4752         int compressed = 0;
4753         int ret;
4754         int err = 0;
4755
4756         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4757
4758         /* Check inline extent */
4759         extent_type = btrfs_file_extent_type(node, fi);
4760         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4761                 struct btrfs_item *e = btrfs_item_nr(slot);
4762                 u32 item_inline_len;
4763
4764                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4765                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4766                 compressed = btrfs_file_extent_compression(node, fi);
4767                 if (extent_num_bytes == 0) {
4768                         error(
4769                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4770                                 root->objectid, fkey->objectid, fkey->offset);
4771                         err |= FILE_EXTENT_ERROR;
4772                 }
4773                 if (!compressed && extent_num_bytes != item_inline_len) {
4774                         error(
4775                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4776                                 root->objectid, fkey->objectid, fkey->offset,
4777                                 extent_num_bytes, item_inline_len);
4778                         err |= FILE_EXTENT_ERROR;
4779                 }
4780                 *end += extent_num_bytes;
4781                 *size += extent_num_bytes;
4782                 return err;
4783         }
4784
4785         /* Check extent type */
4786         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4787                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4788                 err |= FILE_EXTENT_ERROR;
4789                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4790                       root->objectid, fkey->objectid, fkey->offset);
4791                 return err;
4792         }
4793
4794         /* Check REG_EXTENT/PREALLOC_EXTENT */
4795         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4796         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4797         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4798         extent_offset = btrfs_file_extent_offset(node, fi);
4799         compressed = btrfs_file_extent_compression(node, fi);
4800         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4801
4802         /*
4803          * Check EXTENT_DATA csum
4804          *
4805          * For plain (uncompressed) extent, we should only check the range
4806          * we're referring to, as it's possible that part of prealloc extent
4807          * has been written, and has csum:
4808          *
4809          * |<--- Original large preallocated extent A ---->|
4810          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4811          *      No csum                         Has csum
4812          *
4813          * For compressed extent, we should check the whole range.
4814          */
4815         if (!compressed) {
4816                 search_start = disk_bytenr + extent_offset;
4817                 search_len = extent_num_bytes;
4818         } else {
4819                 search_start = disk_bytenr;
4820                 search_len = disk_num_bytes;
4821         }
4822         ret = count_csum_range(root, search_start, search_len, &csum_found);
4823         if (csum_found > 0 && nodatasum) {
4824                 err |= ODD_CSUM_ITEM;
4825                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4826                       root->objectid, fkey->objectid, fkey->offset);
4827         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4828                    !is_hole && (ret < 0 || csum_found < search_len)) {
4829                 err |= CSUM_ITEM_MISSING;
4830                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4831                       root->objectid, fkey->objectid, fkey->offset,
4832                       csum_found, search_len);
4833         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4834                 err |= ODD_CSUM_ITEM;
4835                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4836                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4837         }
4838
4839         /* Check EXTENT_DATA hole */
4840         if (!no_holes && *end != fkey->offset) {
4841                 err |= FILE_EXTENT_ERROR;
4842                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4843                       root->objectid, fkey->objectid, fkey->offset);
4844         }
4845
4846         *end += extent_num_bytes;
4847         if (!is_hole)
4848                 *size += extent_num_bytes;
4849
4850         return err;
4851 }
4852
4853 /*
4854  * Check INODE_ITEM and related ITEMs (the same inode number)
4855  * 1. check link count
4856  * 2. check inode ref/extref
4857  * 3. check dir item/index
4858  *
4859  * @ext_ref:    the EXTENDED_IREF feature
4860  *
4861  * Return 0 if no error occurred.
4862  * Return >0 for error or hit the traversal is done(by error bitmap)
4863  */
4864 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4865                             unsigned int ext_ref)
4866 {
4867         struct extent_buffer *node;
4868         struct btrfs_inode_item *ii;
4869         struct btrfs_key key;
4870         u64 inode_id;
4871         u32 mode;
4872         u64 nlink;
4873         u64 nbytes;
4874         u64 isize;
4875         u64 size = 0;
4876         u64 refs = 0;
4877         u64 extent_end = 0;
4878         u64 extent_size = 0;
4879         unsigned int dir;
4880         unsigned int nodatasum;
4881         int slot;
4882         int ret;
4883         int err = 0;
4884
4885         node = path->nodes[0];
4886         slot = path->slots[0];
4887
4888         btrfs_item_key_to_cpu(node, &key, slot);
4889         inode_id = key.objectid;
4890
4891         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4892                 ret = btrfs_next_item(root, path);
4893                 if (ret > 0)
4894                         err |= LAST_ITEM;
4895                 return err;
4896         }
4897
4898         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4899         isize = btrfs_inode_size(node, ii);
4900         nbytes = btrfs_inode_nbytes(node, ii);
4901         mode = btrfs_inode_mode(node, ii);
4902         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4903         nlink = btrfs_inode_nlink(node, ii);
4904         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4905
4906         while (1) {
4907                 ret = btrfs_next_item(root, path);
4908                 if (ret < 0) {
4909                         /* out will fill 'err' rusing current statistics */
4910                         goto out;
4911                 } else if (ret > 0) {
4912                         err |= LAST_ITEM;
4913                         goto out;
4914                 }
4915
4916                 node = path->nodes[0];
4917                 slot = path->slots[0];
4918                 btrfs_item_key_to_cpu(node, &key, slot);
4919                 if (key.objectid != inode_id)
4920                         goto out;
4921
4922                 switch (key.type) {
4923                 case BTRFS_INODE_REF_KEY:
4924                         ret = check_inode_ref(root, &key, node, slot, &refs,
4925                                               mode);
4926                         err |= ret;
4927                         break;
4928                 case BTRFS_INODE_EXTREF_KEY:
4929                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4930                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4931                                         root->objectid, key.objectid,
4932                                         key.offset);
4933                         ret = check_inode_extref(root, &key, node, slot, &refs,
4934                                                  mode);
4935                         err |= ret;
4936                         break;
4937                 case BTRFS_DIR_ITEM_KEY:
4938                 case BTRFS_DIR_INDEX_KEY:
4939                         if (!dir) {
4940                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4941                                         root->objectid, inode_id,
4942                                         imode_to_type(mode), key.objectid,
4943                                         key.offset);
4944                         }
4945                         ret = check_dir_item(root, &key, node, slot, &size,
4946                                              ext_ref);
4947                         err |= ret;
4948                         break;
4949                 case BTRFS_EXTENT_DATA_KEY:
4950                         if (dir) {
4951                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4952                                         root->objectid, inode_id, key.objectid,
4953                                         key.offset);
4954                         }
4955                         ret = check_file_extent(root, &key, node, slot,
4956                                                 nodatasum, &extent_size,
4957                                                 &extent_end);
4958                         err |= ret;
4959                         break;
4960                 case BTRFS_XATTR_ITEM_KEY:
4961                         break;
4962                 default:
4963                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4964                               key.objectid, key.type, key.offset);
4965                 }
4966         }
4967
4968 out:
4969         /* verify INODE_ITEM nlink/isize/nbytes */
4970         if (dir) {
4971                 if (nlink != 1) {
4972                         err |= LINK_COUNT_ERROR;
4973                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4974                               root->objectid, inode_id, nlink);
4975                 }
4976
4977                 /*
4978                  * Just a warning, as dir inode nbytes is just an
4979                  * instructive value.
4980                  */
4981                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4982                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4983                                 root->objectid, inode_id, root->nodesize);
4984                 }
4985
4986                 if (isize != size) {
4987                         err |= ISIZE_ERROR;
4988                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4989                               root->objectid, inode_id, isize, size);
4990                 }
4991         } else {
4992                 if (nlink != refs) {
4993                         err |= LINK_COUNT_ERROR;
4994                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4995                               root->objectid, inode_id, nlink, refs);
4996                 } else if (!nlink) {
4997                         err |= ORPHAN_ITEM;
4998                 }
4999
5000                 if (!nbytes && !no_holes && extent_end < isize) {
5001                         err |= NBYTES_ERROR;
5002                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5003                               root->objectid, inode_id, isize);
5004                 }
5005
5006                 if (nbytes != extent_size) {
5007                         err |= NBYTES_ERROR;
5008                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5009                               root->objectid, inode_id, nbytes, extent_size);
5010                 }
5011         }
5012
5013         return err;
5014 }
5015
5016 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5017 {
5018         struct btrfs_path path;
5019         struct btrfs_key key;
5020         int err = 0;
5021         int ret;
5022
5023         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5024         key.type = BTRFS_INODE_ITEM_KEY;
5025         key.offset = 0;
5026
5027         /* For root being dropped, we don't need to check first inode */
5028         if (btrfs_root_refs(&root->root_item) == 0 &&
5029             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5030             key.objectid)
5031                 return 0;
5032
5033         btrfs_init_path(&path);
5034
5035         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5036         if (ret < 0)
5037                 goto out;
5038         if (ret > 0) {
5039                 ret = 0;
5040                 err |= INODE_ITEM_MISSING;
5041                 error("first inode item of root %llu is missing",
5042                       root->objectid);
5043         }
5044
5045         err |= check_inode_item(root, &path, ext_ref);
5046         err &= ~LAST_ITEM;
5047         if (err && !ret)
5048                 ret = -EIO;
5049 out:
5050         btrfs_release_path(&path);
5051         return ret;
5052 }
5053
5054 /*
5055  * Iterate all item on the tree and call check_inode_item() to check.
5056  *
5057  * @root:       the root of the tree to be checked.
5058  * @ext_ref:    the EXTENDED_IREF feature
5059  *
5060  * Return 0 if no error found.
5061  * Return <0 for error.
5062  */
5063 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5064 {
5065         struct btrfs_path path;
5066         struct node_refs nrefs;
5067         struct btrfs_root_item *root_item = &root->root_item;
5068         int ret;
5069         int level;
5070         int err = 0;
5071
5072         /*
5073          * We need to manually check the first inode item(256)
5074          * As the following traversal function will only start from
5075          * the first inode item in the leaf, if inode item(256) is missing
5076          * we will just skip it forever.
5077          */
5078         ret = check_fs_first_inode(root, ext_ref);
5079         if (ret < 0)
5080                 return ret;
5081
5082         memset(&nrefs, 0, sizeof(nrefs));
5083         level = btrfs_header_level(root->node);
5084         btrfs_init_path(&path);
5085
5086         if (btrfs_root_refs(root_item) > 0 ||
5087             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5088                 path.nodes[level] = root->node;
5089                 path.slots[level] = 0;
5090                 extent_buffer_get(root->node);
5091         } else {
5092                 struct btrfs_key key;
5093
5094                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5095                 level = root_item->drop_level;
5096                 path.lowest_level = level;
5097                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5098                 if (ret < 0)
5099                         goto out;
5100                 ret = 0;
5101         }
5102
5103         while (1) {
5104                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5105                 err |= !!ret;
5106
5107                 /* if ret is negative, walk shall stop */
5108                 if (ret < 0) {
5109                         ret = err;
5110                         break;
5111                 }
5112
5113                 ret = walk_up_tree_v2(root, &path, &level);
5114                 if (ret != 0) {
5115                         /* Normal exit, reset ret to err */
5116                         ret = err;
5117                         break;
5118                 }
5119         }
5120
5121 out:
5122         btrfs_release_path(&path);
5123         return ret;
5124 }
5125
5126 /*
5127  * Find the relative ref for root_ref and root_backref.
5128  *
5129  * @root:       the root of the root tree.
5130  * @ref_key:    the key of the root ref.
5131  *
5132  * Return 0 if no error occurred.
5133  */
5134 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5135                           struct extent_buffer *node, int slot)
5136 {
5137         struct btrfs_path path;
5138         struct btrfs_key key;
5139         struct btrfs_root_ref *ref;
5140         struct btrfs_root_ref *backref;
5141         char ref_name[BTRFS_NAME_LEN] = {0};
5142         char backref_name[BTRFS_NAME_LEN] = {0};
5143         u64 ref_dirid;
5144         u64 ref_seq;
5145         u32 ref_namelen;
5146         u64 backref_dirid;
5147         u64 backref_seq;
5148         u32 backref_namelen;
5149         u32 len;
5150         int ret;
5151         int err = 0;
5152
5153         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5154         ref_dirid = btrfs_root_ref_dirid(node, ref);
5155         ref_seq = btrfs_root_ref_sequence(node, ref);
5156         ref_namelen = btrfs_root_ref_name_len(node, ref);
5157
5158         if (ref_namelen <= BTRFS_NAME_LEN) {
5159                 len = ref_namelen;
5160         } else {
5161                 len = BTRFS_NAME_LEN;
5162                 warning("%s[%llu %llu] ref_name too long",
5163                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5164                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5165                         ref_key->offset);
5166         }
5167         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5168
5169         /* Find relative root_ref */
5170         key.objectid = ref_key->offset;
5171         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5172         key.offset = ref_key->objectid;
5173
5174         btrfs_init_path(&path);
5175         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5176         if (ret) {
5177                 err |= ROOT_REF_MISSING;
5178                 error("%s[%llu %llu] couldn't find relative ref",
5179                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5180                       "ROOT_REF" : "ROOT_BACKREF",
5181                       ref_key->objectid, ref_key->offset);
5182                 goto out;
5183         }
5184
5185         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5186                                  struct btrfs_root_ref);
5187         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5188         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5189         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5190
5191         if (backref_namelen <= BTRFS_NAME_LEN) {
5192                 len = backref_namelen;
5193         } else {
5194                 len = BTRFS_NAME_LEN;
5195                 warning("%s[%llu %llu] ref_name too long",
5196                         key.type == BTRFS_ROOT_REF_KEY ?
5197                         "ROOT_REF" : "ROOT_BACKREF",
5198                         key.objectid, key.offset);
5199         }
5200         read_extent_buffer(path.nodes[0], backref_name,
5201                            (unsigned long)(backref + 1), len);
5202
5203         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5204             ref_namelen != backref_namelen ||
5205             strncmp(ref_name, backref_name, len)) {
5206                 err |= ROOT_REF_MISMATCH;
5207                 error("%s[%llu %llu] mismatch relative ref",
5208                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5209                       "ROOT_REF" : "ROOT_BACKREF",
5210                       ref_key->objectid, ref_key->offset);
5211         }
5212 out:
5213         btrfs_release_path(&path);
5214         return err;
5215 }
5216
5217 /*
5218  * Check all fs/file tree in low_memory mode.
5219  *
5220  * 1. for fs tree root item, call check_fs_root_v2()
5221  * 2. for fs tree root ref/backref, call check_root_ref()
5222  *
5223  * Return 0 if no error occurred.
5224  */
5225 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5226 {
5227         struct btrfs_root *tree_root = fs_info->tree_root;
5228         struct btrfs_root *cur_root = NULL;
5229         struct btrfs_path path;
5230         struct btrfs_key key;
5231         struct extent_buffer *node;
5232         unsigned int ext_ref;
5233         int slot;
5234         int ret;
5235         int err = 0;
5236
5237         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5238
5239         btrfs_init_path(&path);
5240         key.objectid = BTRFS_FS_TREE_OBJECTID;
5241         key.offset = 0;
5242         key.type = BTRFS_ROOT_ITEM_KEY;
5243
5244         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5245         if (ret < 0) {
5246                 err = ret;
5247                 goto out;
5248         } else if (ret > 0) {
5249                 err = -ENOENT;
5250                 goto out;
5251         }
5252
5253         while (1) {
5254                 node = path.nodes[0];
5255                 slot = path.slots[0];
5256                 btrfs_item_key_to_cpu(node, &key, slot);
5257                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5258                         goto out;
5259                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5260                     fs_root_objectid(key.objectid)) {
5261                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5262                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5263                                                                        &key);
5264                         } else {
5265                                 key.offset = (u64)-1;
5266                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5267                         }
5268
5269                         if (IS_ERR(cur_root)) {
5270                                 error("Fail to read fs/subvol tree: %lld",
5271                                       key.objectid);
5272                                 err = -EIO;
5273                                 goto next;
5274                         }
5275
5276                         ret = check_fs_root_v2(cur_root, ext_ref);
5277                         err |= ret;
5278
5279                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5280                                 btrfs_free_fs_root(cur_root);
5281                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5282                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5283                         ret = check_root_ref(tree_root, &key, node, slot);
5284                         err |= ret;
5285                 }
5286 next:
5287                 ret = btrfs_next_item(tree_root, &path);
5288                 if (ret > 0)
5289                         goto out;
5290                 if (ret < 0) {
5291                         err = ret;
5292                         goto out;
5293                 }
5294         }
5295
5296 out:
5297         btrfs_release_path(&path);
5298         return err;
5299 }
5300
5301 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5302 {
5303         struct list_head *cur = rec->backrefs.next;
5304         struct extent_backref *back;
5305         struct tree_backref *tback;
5306         struct data_backref *dback;
5307         u64 found = 0;
5308         int err = 0;
5309
5310         while(cur != &rec->backrefs) {
5311                 back = to_extent_backref(cur);
5312                 cur = cur->next;
5313                 if (!back->found_extent_tree) {
5314                         err = 1;
5315                         if (!print_errs)
5316                                 goto out;
5317                         if (back->is_data) {
5318                                 dback = to_data_backref(back);
5319                                 fprintf(stderr, "Backref %llu %s %llu"
5320                                         " owner %llu offset %llu num_refs %lu"
5321                                         " not found in extent tree\n",
5322                                         (unsigned long long)rec->start,
5323                                         back->full_backref ?
5324                                         "parent" : "root",
5325                                         back->full_backref ?
5326                                         (unsigned long long)dback->parent:
5327                                         (unsigned long long)dback->root,
5328                                         (unsigned long long)dback->owner,
5329                                         (unsigned long long)dback->offset,
5330                                         (unsigned long)dback->num_refs);
5331                         } else {
5332                                 tback = to_tree_backref(back);
5333                                 fprintf(stderr, "Backref %llu parent %llu"
5334                                         " root %llu not found in extent tree\n",
5335                                         (unsigned long long)rec->start,
5336                                         (unsigned long long)tback->parent,
5337                                         (unsigned long long)tback->root);
5338                         }
5339                 }
5340                 if (!back->is_data && !back->found_ref) {
5341                         err = 1;
5342                         if (!print_errs)
5343                                 goto out;
5344                         tback = to_tree_backref(back);
5345                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5346                                 (unsigned long long)rec->start,
5347                                 back->full_backref ? "parent" : "root",
5348                                 back->full_backref ?
5349                                 (unsigned long long)tback->parent :
5350                                 (unsigned long long)tback->root, back);
5351                 }
5352                 if (back->is_data) {
5353                         dback = to_data_backref(back);
5354                         if (dback->found_ref != dback->num_refs) {
5355                                 err = 1;
5356                                 if (!print_errs)
5357                                         goto out;
5358                                 fprintf(stderr, "Incorrect local backref count"
5359                                         " on %llu %s %llu owner %llu"
5360                                         " offset %llu found %u wanted %u back %p\n",
5361                                         (unsigned long long)rec->start,
5362                                         back->full_backref ?
5363                                         "parent" : "root",
5364                                         back->full_backref ?
5365                                         (unsigned long long)dback->parent:
5366                                         (unsigned long long)dback->root,
5367                                         (unsigned long long)dback->owner,
5368                                         (unsigned long long)dback->offset,
5369                                         dback->found_ref, dback->num_refs, back);
5370                         }
5371                         if (dback->disk_bytenr != rec->start) {
5372                                 err = 1;
5373                                 if (!print_errs)
5374                                         goto out;
5375                                 fprintf(stderr, "Backref disk bytenr does not"
5376                                         " match extent record, bytenr=%llu, "
5377                                         "ref bytenr=%llu\n",
5378                                         (unsigned long long)rec->start,
5379                                         (unsigned long long)dback->disk_bytenr);
5380                         }
5381
5382                         if (dback->bytes != rec->nr) {
5383                                 err = 1;
5384                                 if (!print_errs)
5385                                         goto out;
5386                                 fprintf(stderr, "Backref bytes do not match "
5387                                         "extent backref, bytenr=%llu, ref "
5388                                         "bytes=%llu, backref bytes=%llu\n",
5389                                         (unsigned long long)rec->start,
5390                                         (unsigned long long)rec->nr,
5391                                         (unsigned long long)dback->bytes);
5392                         }
5393                 }
5394                 if (!back->is_data) {
5395                         found += 1;
5396                 } else {
5397                         dback = to_data_backref(back);
5398                         found += dback->found_ref;
5399                 }
5400         }
5401         if (found != rec->refs) {
5402                 err = 1;
5403                 if (!print_errs)
5404                         goto out;
5405                 fprintf(stderr, "Incorrect global backref count "
5406                         "on %llu found %llu wanted %llu\n",
5407                         (unsigned long long)rec->start,
5408                         (unsigned long long)found,
5409                         (unsigned long long)rec->refs);
5410         }
5411 out:
5412         return err;
5413 }
5414
5415 static int free_all_extent_backrefs(struct extent_record *rec)
5416 {
5417         struct extent_backref *back;
5418         struct list_head *cur;
5419         while (!list_empty(&rec->backrefs)) {
5420                 cur = rec->backrefs.next;
5421                 back = to_extent_backref(cur);
5422                 list_del(cur);
5423                 free(back);
5424         }
5425         return 0;
5426 }
5427
5428 static void free_extent_record_cache(struct cache_tree *extent_cache)
5429 {
5430         struct cache_extent *cache;
5431         struct extent_record *rec;
5432
5433         while (1) {
5434                 cache = first_cache_extent(extent_cache);
5435                 if (!cache)
5436                         break;
5437                 rec = container_of(cache, struct extent_record, cache);
5438                 remove_cache_extent(extent_cache, cache);
5439                 free_all_extent_backrefs(rec);
5440                 free(rec);
5441         }
5442 }
5443
5444 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5445                                  struct extent_record *rec)
5446 {
5447         if (rec->content_checked && rec->owner_ref_checked &&
5448             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5449             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5450             !rec->bad_full_backref && !rec->crossing_stripes &&
5451             !rec->wrong_chunk_type) {
5452                 remove_cache_extent(extent_cache, &rec->cache);
5453                 free_all_extent_backrefs(rec);
5454                 list_del_init(&rec->list);
5455                 free(rec);
5456         }
5457         return 0;
5458 }
5459
5460 static int check_owner_ref(struct btrfs_root *root,
5461                             struct extent_record *rec,
5462                             struct extent_buffer *buf)
5463 {
5464         struct extent_backref *node;
5465         struct tree_backref *back;
5466         struct btrfs_root *ref_root;
5467         struct btrfs_key key;
5468         struct btrfs_path path;
5469         struct extent_buffer *parent;
5470         int level;
5471         int found = 0;
5472         int ret;
5473
5474         list_for_each_entry(node, &rec->backrefs, list) {
5475                 if (node->is_data)
5476                         continue;
5477                 if (!node->found_ref)
5478                         continue;
5479                 if (node->full_backref)
5480                         continue;
5481                 back = to_tree_backref(node);
5482                 if (btrfs_header_owner(buf) == back->root)
5483                         return 0;
5484         }
5485         BUG_ON(rec->is_root);
5486
5487         /* try to find the block by search corresponding fs tree */
5488         key.objectid = btrfs_header_owner(buf);
5489         key.type = BTRFS_ROOT_ITEM_KEY;
5490         key.offset = (u64)-1;
5491
5492         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5493         if (IS_ERR(ref_root))
5494                 return 1;
5495
5496         level = btrfs_header_level(buf);
5497         if (level == 0)
5498                 btrfs_item_key_to_cpu(buf, &key, 0);
5499         else
5500                 btrfs_node_key_to_cpu(buf, &key, 0);
5501
5502         btrfs_init_path(&path);
5503         path.lowest_level = level + 1;
5504         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5505         if (ret < 0)
5506                 return 0;
5507
5508         parent = path.nodes[level + 1];
5509         if (parent && buf->start == btrfs_node_blockptr(parent,
5510                                                         path.slots[level + 1]))
5511                 found = 1;
5512
5513         btrfs_release_path(&path);
5514         return found ? 0 : 1;
5515 }
5516
5517 static int is_extent_tree_record(struct extent_record *rec)
5518 {
5519         struct list_head *cur = rec->backrefs.next;
5520         struct extent_backref *node;
5521         struct tree_backref *back;
5522         int is_extent = 0;
5523
5524         while(cur != &rec->backrefs) {
5525                 node = to_extent_backref(cur);
5526                 cur = cur->next;
5527                 if (node->is_data)
5528                         return 0;
5529                 back = to_tree_backref(node);
5530                 if (node->full_backref)
5531                         return 0;
5532                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5533                         is_extent = 1;
5534         }
5535         return is_extent;
5536 }
5537
5538
5539 static int record_bad_block_io(struct btrfs_fs_info *info,
5540                                struct cache_tree *extent_cache,
5541                                u64 start, u64 len)
5542 {
5543         struct extent_record *rec;
5544         struct cache_extent *cache;
5545         struct btrfs_key key;
5546
5547         cache = lookup_cache_extent(extent_cache, start, len);
5548         if (!cache)
5549                 return 0;
5550
5551         rec = container_of(cache, struct extent_record, cache);
5552         if (!is_extent_tree_record(rec))
5553                 return 0;
5554
5555         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5556         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5557 }
5558
5559 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5560                        struct extent_buffer *buf, int slot)
5561 {
5562         if (btrfs_header_level(buf)) {
5563                 struct btrfs_key_ptr ptr1, ptr2;
5564
5565                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5566                                    sizeof(struct btrfs_key_ptr));
5567                 read_extent_buffer(buf, &ptr2,
5568                                    btrfs_node_key_ptr_offset(slot + 1),
5569                                    sizeof(struct btrfs_key_ptr));
5570                 write_extent_buffer(buf, &ptr1,
5571                                     btrfs_node_key_ptr_offset(slot + 1),
5572                                     sizeof(struct btrfs_key_ptr));
5573                 write_extent_buffer(buf, &ptr2,
5574                                     btrfs_node_key_ptr_offset(slot),
5575                                     sizeof(struct btrfs_key_ptr));
5576                 if (slot == 0) {
5577                         struct btrfs_disk_key key;
5578                         btrfs_node_key(buf, &key, 0);
5579                         btrfs_fixup_low_keys(root, path, &key,
5580                                              btrfs_header_level(buf) + 1);
5581                 }
5582         } else {
5583                 struct btrfs_item *item1, *item2;
5584                 struct btrfs_key k1, k2;
5585                 char *item1_data, *item2_data;
5586                 u32 item1_offset, item2_offset, item1_size, item2_size;
5587
5588                 item1 = btrfs_item_nr(slot);
5589                 item2 = btrfs_item_nr(slot + 1);
5590                 btrfs_item_key_to_cpu(buf, &k1, slot);
5591                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5592                 item1_offset = btrfs_item_offset(buf, item1);
5593                 item2_offset = btrfs_item_offset(buf, item2);
5594                 item1_size = btrfs_item_size(buf, item1);
5595                 item2_size = btrfs_item_size(buf, item2);
5596
5597                 item1_data = malloc(item1_size);
5598                 if (!item1_data)
5599                         return -ENOMEM;
5600                 item2_data = malloc(item2_size);
5601                 if (!item2_data) {
5602                         free(item1_data);
5603                         return -ENOMEM;
5604                 }
5605
5606                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5607                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5608
5609                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5610                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5611                 free(item1_data);
5612                 free(item2_data);
5613
5614                 btrfs_set_item_offset(buf, item1, item2_offset);
5615                 btrfs_set_item_offset(buf, item2, item1_offset);
5616                 btrfs_set_item_size(buf, item1, item2_size);
5617                 btrfs_set_item_size(buf, item2, item1_size);
5618
5619                 path->slots[0] = slot;
5620                 btrfs_set_item_key_unsafe(root, path, &k2);
5621                 path->slots[0] = slot + 1;
5622                 btrfs_set_item_key_unsafe(root, path, &k1);
5623         }
5624         return 0;
5625 }
5626
5627 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5628 {
5629         struct extent_buffer *buf;
5630         struct btrfs_key k1, k2;
5631         int i;
5632         int level = path->lowest_level;
5633         int ret = -EIO;
5634
5635         buf = path->nodes[level];
5636         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5637                 if (level) {
5638                         btrfs_node_key_to_cpu(buf, &k1, i);
5639                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5640                 } else {
5641                         btrfs_item_key_to_cpu(buf, &k1, i);
5642                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5643                 }
5644                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5645                         continue;
5646                 ret = swap_values(root, path, buf, i);
5647                 if (ret)
5648                         break;
5649                 btrfs_mark_buffer_dirty(buf);
5650                 i = 0;
5651         }
5652         return ret;
5653 }
5654
5655 static int delete_bogus_item(struct btrfs_root *root,
5656                              struct btrfs_path *path,
5657                              struct extent_buffer *buf, int slot)
5658 {
5659         struct btrfs_key key;
5660         int nritems = btrfs_header_nritems(buf);
5661
5662         btrfs_item_key_to_cpu(buf, &key, slot);
5663
5664         /* These are all the keys we can deal with missing. */
5665         if (key.type != BTRFS_DIR_INDEX_KEY &&
5666             key.type != BTRFS_EXTENT_ITEM_KEY &&
5667             key.type != BTRFS_METADATA_ITEM_KEY &&
5668             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5669             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5670                 return -1;
5671
5672         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5673                (unsigned long long)key.objectid, key.type,
5674                (unsigned long long)key.offset, slot, buf->start);
5675         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5676                               btrfs_item_nr_offset(slot + 1),
5677                               sizeof(struct btrfs_item) *
5678                               (nritems - slot - 1));
5679         btrfs_set_header_nritems(buf, nritems - 1);
5680         if (slot == 0) {
5681                 struct btrfs_disk_key disk_key;
5682
5683                 btrfs_item_key(buf, &disk_key, 0);
5684                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5685         }
5686         btrfs_mark_buffer_dirty(buf);
5687         return 0;
5688 }
5689
5690 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5691 {
5692         struct extent_buffer *buf;
5693         int i;
5694         int ret = 0;
5695
5696         /* We should only get this for leaves */
5697         BUG_ON(path->lowest_level);
5698         buf = path->nodes[0];
5699 again:
5700         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5701                 unsigned int shift = 0, offset;
5702
5703                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5704                     BTRFS_LEAF_DATA_SIZE(root)) {
5705                         if (btrfs_item_end_nr(buf, i) >
5706                             BTRFS_LEAF_DATA_SIZE(root)) {
5707                                 ret = delete_bogus_item(root, path, buf, i);
5708                                 if (!ret)
5709                                         goto again;
5710                                 fprintf(stderr, "item is off the end of the "
5711                                         "leaf, can't fix\n");
5712                                 ret = -EIO;
5713                                 break;
5714                         }
5715                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5716                                 btrfs_item_end_nr(buf, i);
5717                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5718                            btrfs_item_offset_nr(buf, i - 1)) {
5719                         if (btrfs_item_end_nr(buf, i) >
5720                             btrfs_item_offset_nr(buf, i - 1)) {
5721                                 ret = delete_bogus_item(root, path, buf, i);
5722                                 if (!ret)
5723                                         goto again;
5724                                 fprintf(stderr, "items overlap, can't fix\n");
5725                                 ret = -EIO;
5726                                 break;
5727                         }
5728                         shift = btrfs_item_offset_nr(buf, i - 1) -
5729                                 btrfs_item_end_nr(buf, i);
5730                 }
5731                 if (!shift)
5732                         continue;
5733
5734                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5735                        i, shift, (unsigned long long)buf->start);
5736                 offset = btrfs_item_offset_nr(buf, i);
5737                 memmove_extent_buffer(buf,
5738                                       btrfs_leaf_data(buf) + offset + shift,
5739                                       btrfs_leaf_data(buf) + offset,
5740                                       btrfs_item_size_nr(buf, i));
5741                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5742                                       offset + shift);
5743                 btrfs_mark_buffer_dirty(buf);
5744         }
5745
5746         /*
5747          * We may have moved things, in which case we want to exit so we don't
5748          * write those changes out.  Once we have proper abort functionality in
5749          * progs this can be changed to something nicer.
5750          */
5751         BUG_ON(ret);
5752         return ret;
5753 }
5754
5755 /*
5756  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5757  * then just return -EIO.
5758  */
5759 static int try_to_fix_bad_block(struct btrfs_root *root,
5760                                 struct extent_buffer *buf,
5761                                 enum btrfs_tree_block_status status)
5762 {
5763         struct btrfs_trans_handle *trans;
5764         struct ulist *roots;
5765         struct ulist_node *node;
5766         struct btrfs_root *search_root;
5767         struct btrfs_path path;
5768         struct ulist_iterator iter;
5769         struct btrfs_key root_key, key;
5770         int ret;
5771
5772         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5773             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5774                 return -EIO;
5775
5776         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5777         if (ret)
5778                 return -EIO;
5779
5780         btrfs_init_path(&path);
5781         ULIST_ITER_INIT(&iter);
5782         while ((node = ulist_next(roots, &iter))) {
5783                 root_key.objectid = node->val;
5784                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5785                 root_key.offset = (u64)-1;
5786
5787                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5788                 if (IS_ERR(root)) {
5789                         ret = -EIO;
5790                         break;
5791                 }
5792
5793
5794                 trans = btrfs_start_transaction(search_root, 0);
5795                 if (IS_ERR(trans)) {
5796                         ret = PTR_ERR(trans);
5797                         break;
5798                 }
5799
5800                 path.lowest_level = btrfs_header_level(buf);
5801                 path.skip_check_block = 1;
5802                 if (path.lowest_level)
5803                         btrfs_node_key_to_cpu(buf, &key, 0);
5804                 else
5805                         btrfs_item_key_to_cpu(buf, &key, 0);
5806                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5807                 if (ret) {
5808                         ret = -EIO;
5809                         btrfs_commit_transaction(trans, search_root);
5810                         break;
5811                 }
5812                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5813                         ret = fix_key_order(search_root, &path);
5814                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5815                         ret = fix_item_offset(search_root, &path);
5816                 if (ret) {
5817                         btrfs_commit_transaction(trans, search_root);
5818                         break;
5819                 }
5820                 btrfs_release_path(&path);
5821                 btrfs_commit_transaction(trans, search_root);
5822         }
5823         ulist_free(roots);
5824         btrfs_release_path(&path);
5825         return ret;
5826 }
5827
5828 static int check_block(struct btrfs_root *root,
5829                        struct cache_tree *extent_cache,
5830                        struct extent_buffer *buf, u64 flags)
5831 {
5832         struct extent_record *rec;
5833         struct cache_extent *cache;
5834         struct btrfs_key key;
5835         enum btrfs_tree_block_status status;
5836         int ret = 0;
5837         int level;
5838
5839         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5840         if (!cache)
5841                 return 1;
5842         rec = container_of(cache, struct extent_record, cache);
5843         rec->generation = btrfs_header_generation(buf);
5844
5845         level = btrfs_header_level(buf);
5846         if (btrfs_header_nritems(buf) > 0) {
5847
5848                 if (level == 0)
5849                         btrfs_item_key_to_cpu(buf, &key, 0);
5850                 else
5851                         btrfs_node_key_to_cpu(buf, &key, 0);
5852
5853                 rec->info_objectid = key.objectid;
5854         }
5855         rec->info_level = level;
5856
5857         if (btrfs_is_leaf(buf))
5858                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5859         else
5860                 status = btrfs_check_node(root, &rec->parent_key, buf);
5861
5862         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5863                 if (repair)
5864                         status = try_to_fix_bad_block(root, buf, status);
5865                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5866                         ret = -EIO;
5867                         fprintf(stderr, "bad block %llu\n",
5868                                 (unsigned long long)buf->start);
5869                 } else {
5870                         /*
5871                          * Signal to callers we need to start the scan over
5872                          * again since we'll have cowed blocks.
5873                          */
5874                         ret = -EAGAIN;
5875                 }
5876         } else {
5877                 rec->content_checked = 1;
5878                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5879                         rec->owner_ref_checked = 1;
5880                 else {
5881                         ret = check_owner_ref(root, rec, buf);
5882                         if (!ret)
5883                                 rec->owner_ref_checked = 1;
5884                 }
5885         }
5886         if (!ret)
5887                 maybe_free_extent_rec(extent_cache, rec);
5888         return ret;
5889 }
5890
5891 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5892                                                 u64 parent, u64 root)
5893 {
5894         struct list_head *cur = rec->backrefs.next;
5895         struct extent_backref *node;
5896         struct tree_backref *back;
5897
5898         while(cur != &rec->backrefs) {
5899                 node = to_extent_backref(cur);
5900                 cur = cur->next;
5901                 if (node->is_data)
5902                         continue;
5903                 back = to_tree_backref(node);
5904                 if (parent > 0) {
5905                         if (!node->full_backref)
5906                                 continue;
5907                         if (parent == back->parent)
5908                                 return back;
5909                 } else {
5910                         if (node->full_backref)
5911                                 continue;
5912                         if (back->root == root)
5913                                 return back;
5914                 }
5915         }
5916         return NULL;
5917 }
5918
5919 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5920                                                 u64 parent, u64 root)
5921 {
5922         struct tree_backref *ref = malloc(sizeof(*ref));
5923
5924         if (!ref)
5925                 return NULL;
5926         memset(&ref->node, 0, sizeof(ref->node));
5927         if (parent > 0) {
5928                 ref->parent = parent;
5929                 ref->node.full_backref = 1;
5930         } else {
5931                 ref->root = root;
5932                 ref->node.full_backref = 0;
5933         }
5934         list_add_tail(&ref->node.list, &rec->backrefs);
5935
5936         return ref;
5937 }
5938
5939 static struct data_backref *find_data_backref(struct extent_record *rec,
5940                                                 u64 parent, u64 root,
5941                                                 u64 owner, u64 offset,
5942                                                 int found_ref,
5943                                                 u64 disk_bytenr, u64 bytes)
5944 {
5945         struct list_head *cur = rec->backrefs.next;
5946         struct extent_backref *node;
5947         struct data_backref *back;
5948
5949         while(cur != &rec->backrefs) {
5950                 node = to_extent_backref(cur);
5951                 cur = cur->next;
5952                 if (!node->is_data)
5953                         continue;
5954                 back = to_data_backref(node);
5955                 if (parent > 0) {
5956                         if (!node->full_backref)
5957                                 continue;
5958                         if (parent == back->parent)
5959                                 return back;
5960                 } else {
5961                         if (node->full_backref)
5962                                 continue;
5963                         if (back->root == root && back->owner == owner &&
5964                             back->offset == offset) {
5965                                 if (found_ref && node->found_ref &&
5966                                     (back->bytes != bytes ||
5967                                     back->disk_bytenr != disk_bytenr))
5968                                         continue;
5969                                 return back;
5970                         }
5971                 }
5972         }
5973         return NULL;
5974 }
5975
5976 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5977                                                 u64 parent, u64 root,
5978                                                 u64 owner, u64 offset,
5979                                                 u64 max_size)
5980 {
5981         struct data_backref *ref = malloc(sizeof(*ref));
5982
5983         if (!ref)
5984                 return NULL;
5985         memset(&ref->node, 0, sizeof(ref->node));
5986         ref->node.is_data = 1;
5987
5988         if (parent > 0) {
5989                 ref->parent = parent;
5990                 ref->owner = 0;
5991                 ref->offset = 0;
5992                 ref->node.full_backref = 1;
5993         } else {
5994                 ref->root = root;
5995                 ref->owner = owner;
5996                 ref->offset = offset;
5997                 ref->node.full_backref = 0;
5998         }
5999         ref->bytes = max_size;
6000         ref->found_ref = 0;
6001         ref->num_refs = 0;
6002         list_add_tail(&ref->node.list, &rec->backrefs);
6003         if (max_size > rec->max_size)
6004                 rec->max_size = max_size;
6005         return ref;
6006 }
6007
6008 /* Check if the type of extent matches with its chunk */
6009 static void check_extent_type(struct extent_record *rec)
6010 {
6011         struct btrfs_block_group_cache *bg_cache;
6012
6013         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6014         if (!bg_cache)
6015                 return;
6016
6017         /* data extent, check chunk directly*/
6018         if (!rec->metadata) {
6019                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6020                         rec->wrong_chunk_type = 1;
6021                 return;
6022         }
6023
6024         /* metadata extent, check the obvious case first */
6025         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6026                                  BTRFS_BLOCK_GROUP_METADATA))) {
6027                 rec->wrong_chunk_type = 1;
6028                 return;
6029         }
6030
6031         /*
6032          * Check SYSTEM extent, as it's also marked as metadata, we can only
6033          * make sure it's a SYSTEM extent by its backref
6034          */
6035         if (!list_empty(&rec->backrefs)) {
6036                 struct extent_backref *node;
6037                 struct tree_backref *tback;
6038                 u64 bg_type;
6039
6040                 node = to_extent_backref(rec->backrefs.next);
6041                 if (node->is_data) {
6042                         /* tree block shouldn't have data backref */
6043                         rec->wrong_chunk_type = 1;
6044                         return;
6045                 }
6046                 tback = container_of(node, struct tree_backref, node);
6047
6048                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6049                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6050                 else
6051                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6052                 if (!(bg_cache->flags & bg_type))
6053                         rec->wrong_chunk_type = 1;
6054         }
6055 }
6056
6057 /*
6058  * Allocate a new extent record, fill default values from @tmpl and insert int
6059  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6060  * the cache, otherwise it fails.
6061  */
6062 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6063                 struct extent_record *tmpl)
6064 {
6065         struct extent_record *rec;
6066         int ret = 0;
6067
6068         BUG_ON(tmpl->max_size == 0);
6069         rec = malloc(sizeof(*rec));
6070         if (!rec)
6071                 return -ENOMEM;
6072         rec->start = tmpl->start;
6073         rec->max_size = tmpl->max_size;
6074         rec->nr = max(tmpl->nr, tmpl->max_size);
6075         rec->found_rec = tmpl->found_rec;
6076         rec->content_checked = tmpl->content_checked;
6077         rec->owner_ref_checked = tmpl->owner_ref_checked;
6078         rec->num_duplicates = 0;
6079         rec->metadata = tmpl->metadata;
6080         rec->flag_block_full_backref = FLAG_UNSET;
6081         rec->bad_full_backref = 0;
6082         rec->crossing_stripes = 0;
6083         rec->wrong_chunk_type = 0;
6084         rec->is_root = tmpl->is_root;
6085         rec->refs = tmpl->refs;
6086         rec->extent_item_refs = tmpl->extent_item_refs;
6087         rec->parent_generation = tmpl->parent_generation;
6088         INIT_LIST_HEAD(&rec->backrefs);
6089         INIT_LIST_HEAD(&rec->dups);
6090         INIT_LIST_HEAD(&rec->list);
6091         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6092         rec->cache.start = tmpl->start;
6093         rec->cache.size = tmpl->nr;
6094         ret = insert_cache_extent(extent_cache, &rec->cache);
6095         if (ret) {
6096                 free(rec);
6097                 return ret;
6098         }
6099         bytes_used += rec->nr;
6100
6101         if (tmpl->metadata)
6102                 rec->crossing_stripes = check_crossing_stripes(global_info,
6103                                 rec->start, global_info->tree_root->nodesize);
6104         check_extent_type(rec);
6105         return ret;
6106 }
6107
6108 /*
6109  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6110  * some are hints:
6111  * - refs              - if found, increase refs
6112  * - is_root           - if found, set
6113  * - content_checked   - if found, set
6114  * - owner_ref_checked - if found, set
6115  *
6116  * If not found, create a new one, initialize and insert.
6117  */
6118 static int add_extent_rec(struct cache_tree *extent_cache,
6119                 struct extent_record *tmpl)
6120 {
6121         struct extent_record *rec;
6122         struct cache_extent *cache;
6123         int ret = 0;
6124         int dup = 0;
6125
6126         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6127         if (cache) {
6128                 rec = container_of(cache, struct extent_record, cache);
6129                 if (tmpl->refs)
6130                         rec->refs++;
6131                 if (rec->nr == 1)
6132                         rec->nr = max(tmpl->nr, tmpl->max_size);
6133
6134                 /*
6135                  * We need to make sure to reset nr to whatever the extent
6136                  * record says was the real size, this way we can compare it to
6137                  * the backrefs.
6138                  */
6139                 if (tmpl->found_rec) {
6140                         if (tmpl->start != rec->start || rec->found_rec) {
6141                                 struct extent_record *tmp;
6142
6143                                 dup = 1;
6144                                 if (list_empty(&rec->list))
6145                                         list_add_tail(&rec->list,
6146                                                       &duplicate_extents);
6147
6148                                 /*
6149                                  * We have to do this song and dance in case we
6150                                  * find an extent record that falls inside of
6151                                  * our current extent record but does not have
6152                                  * the same objectid.
6153                                  */
6154                                 tmp = malloc(sizeof(*tmp));
6155                                 if (!tmp)
6156                                         return -ENOMEM;
6157                                 tmp->start = tmpl->start;
6158                                 tmp->max_size = tmpl->max_size;
6159                                 tmp->nr = tmpl->nr;
6160                                 tmp->found_rec = 1;
6161                                 tmp->metadata = tmpl->metadata;
6162                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6163                                 INIT_LIST_HEAD(&tmp->list);
6164                                 list_add_tail(&tmp->list, &rec->dups);
6165                                 rec->num_duplicates++;
6166                         } else {
6167                                 rec->nr = tmpl->nr;
6168                                 rec->found_rec = 1;
6169                         }
6170                 }
6171
6172                 if (tmpl->extent_item_refs && !dup) {
6173                         if (rec->extent_item_refs) {
6174                                 fprintf(stderr, "block %llu rec "
6175                                         "extent_item_refs %llu, passed %llu\n",
6176                                         (unsigned long long)tmpl->start,
6177                                         (unsigned long long)
6178                                                         rec->extent_item_refs,
6179                                         (unsigned long long)tmpl->extent_item_refs);
6180                         }
6181                         rec->extent_item_refs = tmpl->extent_item_refs;
6182                 }
6183                 if (tmpl->is_root)
6184                         rec->is_root = 1;
6185                 if (tmpl->content_checked)
6186                         rec->content_checked = 1;
6187                 if (tmpl->owner_ref_checked)
6188                         rec->owner_ref_checked = 1;
6189                 memcpy(&rec->parent_key, &tmpl->parent_key,
6190                                 sizeof(tmpl->parent_key));
6191                 if (tmpl->parent_generation)
6192                         rec->parent_generation = tmpl->parent_generation;
6193                 if (rec->max_size < tmpl->max_size)
6194                         rec->max_size = tmpl->max_size;
6195
6196                 /*
6197                  * A metadata extent can't cross stripe_len boundary, otherwise
6198                  * kernel scrub won't be able to handle it.
6199                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6200                  * it.
6201                  */
6202                 if (tmpl->metadata)
6203                         rec->crossing_stripes = check_crossing_stripes(
6204                                         global_info, rec->start,
6205                                         global_info->tree_root->nodesize);
6206                 check_extent_type(rec);
6207                 maybe_free_extent_rec(extent_cache, rec);
6208                 return ret;
6209         }
6210
6211         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6212
6213         return ret;
6214 }
6215
6216 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6217                             u64 parent, u64 root, int found_ref)
6218 {
6219         struct extent_record *rec;
6220         struct tree_backref *back;
6221         struct cache_extent *cache;
6222         int ret;
6223
6224         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6225         if (!cache) {
6226                 struct extent_record tmpl;
6227
6228                 memset(&tmpl, 0, sizeof(tmpl));
6229                 tmpl.start = bytenr;
6230                 tmpl.nr = 1;
6231                 tmpl.metadata = 1;
6232                 tmpl.max_size = 1;
6233
6234                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6235                 if (ret)
6236                         return ret;
6237
6238                 /* really a bug in cache_extent implement now */
6239                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6240                 if (!cache)
6241                         return -ENOENT;
6242         }
6243
6244         rec = container_of(cache, struct extent_record, cache);
6245         if (rec->start != bytenr) {
6246                 /*
6247                  * Several cause, from unaligned bytenr to over lapping extents
6248                  */
6249                 return -EEXIST;
6250         }
6251
6252         back = find_tree_backref(rec, parent, root);
6253         if (!back) {
6254                 back = alloc_tree_backref(rec, parent, root);
6255                 if (!back)
6256                         return -ENOMEM;
6257         }
6258
6259         if (found_ref) {
6260                 if (back->node.found_ref) {
6261                         fprintf(stderr, "Extent back ref already exists "
6262                                 "for %llu parent %llu root %llu \n",
6263                                 (unsigned long long)bytenr,
6264                                 (unsigned long long)parent,
6265                                 (unsigned long long)root);
6266                 }
6267                 back->node.found_ref = 1;
6268         } else {
6269                 if (back->node.found_extent_tree) {
6270                         fprintf(stderr, "Extent back ref already exists "
6271                                 "for %llu parent %llu root %llu \n",
6272                                 (unsigned long long)bytenr,
6273                                 (unsigned long long)parent,
6274                                 (unsigned long long)root);
6275                 }
6276                 back->node.found_extent_tree = 1;
6277         }
6278         check_extent_type(rec);
6279         maybe_free_extent_rec(extent_cache, rec);
6280         return 0;
6281 }
6282
6283 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6284                             u64 parent, u64 root, u64 owner, u64 offset,
6285                             u32 num_refs, int found_ref, u64 max_size)
6286 {
6287         struct extent_record *rec;
6288         struct data_backref *back;
6289         struct cache_extent *cache;
6290         int ret;
6291
6292         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6293         if (!cache) {
6294                 struct extent_record tmpl;
6295
6296                 memset(&tmpl, 0, sizeof(tmpl));
6297                 tmpl.start = bytenr;
6298                 tmpl.nr = 1;
6299                 tmpl.max_size = max_size;
6300
6301                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6302                 if (ret)
6303                         return ret;
6304
6305                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6306                 if (!cache)
6307                         abort();
6308         }
6309
6310         rec = container_of(cache, struct extent_record, cache);
6311         if (rec->max_size < max_size)
6312                 rec->max_size = max_size;
6313
6314         /*
6315          * If found_ref is set then max_size is the real size and must match the
6316          * existing refs.  So if we have already found a ref then we need to
6317          * make sure that this ref matches the existing one, otherwise we need
6318          * to add a new backref so we can notice that the backrefs don't match
6319          * and we need to figure out who is telling the truth.  This is to
6320          * account for that awful fsync bug I introduced where we'd end up with
6321          * a btrfs_file_extent_item that would have its length include multiple
6322          * prealloc extents or point inside of a prealloc extent.
6323          */
6324         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6325                                  bytenr, max_size);
6326         if (!back) {
6327                 back = alloc_data_backref(rec, parent, root, owner, offset,
6328                                           max_size);
6329                 BUG_ON(!back);
6330         }
6331
6332         if (found_ref) {
6333                 BUG_ON(num_refs != 1);
6334                 if (back->node.found_ref)
6335                         BUG_ON(back->bytes != max_size);
6336                 back->node.found_ref = 1;
6337                 back->found_ref += 1;
6338                 back->bytes = max_size;
6339                 back->disk_bytenr = bytenr;
6340                 rec->refs += 1;
6341                 rec->content_checked = 1;
6342                 rec->owner_ref_checked = 1;
6343         } else {
6344                 if (back->node.found_extent_tree) {
6345                         fprintf(stderr, "Extent back ref already exists "
6346                                 "for %llu parent %llu root %llu "
6347                                 "owner %llu offset %llu num_refs %lu\n",
6348                                 (unsigned long long)bytenr,
6349                                 (unsigned long long)parent,
6350                                 (unsigned long long)root,
6351                                 (unsigned long long)owner,
6352                                 (unsigned long long)offset,
6353                                 (unsigned long)num_refs);
6354                 }
6355                 back->num_refs = num_refs;
6356                 back->node.found_extent_tree = 1;
6357         }
6358         maybe_free_extent_rec(extent_cache, rec);
6359         return 0;
6360 }
6361
6362 static int add_pending(struct cache_tree *pending,
6363                        struct cache_tree *seen, u64 bytenr, u32 size)
6364 {
6365         int ret;
6366         ret = add_cache_extent(seen, bytenr, size);
6367         if (ret)
6368                 return ret;
6369         add_cache_extent(pending, bytenr, size);
6370         return 0;
6371 }
6372
6373 static int pick_next_pending(struct cache_tree *pending,
6374                         struct cache_tree *reada,
6375                         struct cache_tree *nodes,
6376                         u64 last, struct block_info *bits, int bits_nr,
6377                         int *reada_bits)
6378 {
6379         unsigned long node_start = last;
6380         struct cache_extent *cache;
6381         int ret;
6382
6383         cache = search_cache_extent(reada, 0);
6384         if (cache) {
6385                 bits[0].start = cache->start;
6386                 bits[0].size = cache->size;
6387                 *reada_bits = 1;
6388                 return 1;
6389         }
6390         *reada_bits = 0;
6391         if (node_start > 32768)
6392                 node_start -= 32768;
6393
6394         cache = search_cache_extent(nodes, node_start);
6395         if (!cache)
6396                 cache = search_cache_extent(nodes, 0);
6397
6398         if (!cache) {
6399                  cache = search_cache_extent(pending, 0);
6400                  if (!cache)
6401                          return 0;
6402                  ret = 0;
6403                  do {
6404                          bits[ret].start = cache->start;
6405                          bits[ret].size = cache->size;
6406                          cache = next_cache_extent(cache);
6407                          ret++;
6408                  } while (cache && ret < bits_nr);
6409                  return ret;
6410         }
6411
6412         ret = 0;
6413         do {
6414                 bits[ret].start = cache->start;
6415                 bits[ret].size = cache->size;
6416                 cache = next_cache_extent(cache);
6417                 ret++;
6418         } while (cache && ret < bits_nr);
6419
6420         if (bits_nr - ret > 8) {
6421                 u64 lookup = bits[0].start + bits[0].size;
6422                 struct cache_extent *next;
6423                 next = search_cache_extent(pending, lookup);
6424                 while(next) {
6425                         if (next->start - lookup > 32768)
6426                                 break;
6427                         bits[ret].start = next->start;
6428                         bits[ret].size = next->size;
6429                         lookup = next->start + next->size;
6430                         ret++;
6431                         if (ret == bits_nr)
6432                                 break;
6433                         next = next_cache_extent(next);
6434                         if (!next)
6435                                 break;
6436                 }
6437         }
6438         return ret;
6439 }
6440
6441 static void free_chunk_record(struct cache_extent *cache)
6442 {
6443         struct chunk_record *rec;
6444
6445         rec = container_of(cache, struct chunk_record, cache);
6446         list_del_init(&rec->list);
6447         list_del_init(&rec->dextents);
6448         free(rec);
6449 }
6450
6451 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6452 {
6453         cache_tree_free_extents(chunk_cache, free_chunk_record);
6454 }
6455
6456 static void free_device_record(struct rb_node *node)
6457 {
6458         struct device_record *rec;
6459
6460         rec = container_of(node, struct device_record, node);
6461         free(rec);
6462 }
6463
6464 FREE_RB_BASED_TREE(device_cache, free_device_record);
6465
6466 int insert_block_group_record(struct block_group_tree *tree,
6467                               struct block_group_record *bg_rec)
6468 {
6469         int ret;
6470
6471         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6472         if (ret)
6473                 return ret;
6474
6475         list_add_tail(&bg_rec->list, &tree->block_groups);
6476         return 0;
6477 }
6478
6479 static void free_block_group_record(struct cache_extent *cache)
6480 {
6481         struct block_group_record *rec;
6482
6483         rec = container_of(cache, struct block_group_record, cache);
6484         list_del_init(&rec->list);
6485         free(rec);
6486 }
6487
6488 void free_block_group_tree(struct block_group_tree *tree)
6489 {
6490         cache_tree_free_extents(&tree->tree, free_block_group_record);
6491 }
6492
6493 int insert_device_extent_record(struct device_extent_tree *tree,
6494                                 struct device_extent_record *de_rec)
6495 {
6496         int ret;
6497
6498         /*
6499          * Device extent is a bit different from the other extents, because
6500          * the extents which belong to the different devices may have the
6501          * same start and size, so we need use the special extent cache
6502          * search/insert functions.
6503          */
6504         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6505         if (ret)
6506                 return ret;
6507
6508         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6509         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6510         return 0;
6511 }
6512
6513 static void free_device_extent_record(struct cache_extent *cache)
6514 {
6515         struct device_extent_record *rec;
6516
6517         rec = container_of(cache, struct device_extent_record, cache);
6518         if (!list_empty(&rec->chunk_list))
6519                 list_del_init(&rec->chunk_list);
6520         if (!list_empty(&rec->device_list))
6521                 list_del_init(&rec->device_list);
6522         free(rec);
6523 }
6524
6525 void free_device_extent_tree(struct device_extent_tree *tree)
6526 {
6527         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6528 }
6529
6530 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6531 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6532                                  struct extent_buffer *leaf, int slot)
6533 {
6534         struct btrfs_extent_ref_v0 *ref0;
6535         struct btrfs_key key;
6536         int ret;
6537
6538         btrfs_item_key_to_cpu(leaf, &key, slot);
6539         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6540         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6541                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6542                                 0, 0);
6543         } else {
6544                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6545                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6546         }
6547         return ret;
6548 }
6549 #endif
6550
6551 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6552                                             struct btrfs_key *key,
6553                                             int slot)
6554 {
6555         struct btrfs_chunk *ptr;
6556         struct chunk_record *rec;
6557         int num_stripes, i;
6558
6559         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6560         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6561
6562         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6563         if (!rec) {
6564                 fprintf(stderr, "memory allocation failed\n");
6565                 exit(-1);
6566         }
6567
6568         INIT_LIST_HEAD(&rec->list);
6569         INIT_LIST_HEAD(&rec->dextents);
6570         rec->bg_rec = NULL;
6571
6572         rec->cache.start = key->offset;
6573         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6574
6575         rec->generation = btrfs_header_generation(leaf);
6576
6577         rec->objectid = key->objectid;
6578         rec->type = key->type;
6579         rec->offset = key->offset;
6580
6581         rec->length = rec->cache.size;
6582         rec->owner = btrfs_chunk_owner(leaf, ptr);
6583         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6584         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6585         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6586         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6587         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6588         rec->num_stripes = num_stripes;
6589         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6590
6591         for (i = 0; i < rec->num_stripes; ++i) {
6592                 rec->stripes[i].devid =
6593                         btrfs_stripe_devid_nr(leaf, ptr, i);
6594                 rec->stripes[i].offset =
6595                         btrfs_stripe_offset_nr(leaf, ptr, i);
6596                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6597                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6598                                 BTRFS_UUID_SIZE);
6599         }
6600
6601         return rec;
6602 }
6603
6604 static int process_chunk_item(struct cache_tree *chunk_cache,
6605                               struct btrfs_key *key, struct extent_buffer *eb,
6606                               int slot)
6607 {
6608         struct chunk_record *rec;
6609         struct btrfs_chunk *chunk;
6610         int ret = 0;
6611
6612         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6613         /*
6614          * Do extra check for this chunk item,
6615          *
6616          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6617          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6618          * and owner<->key_type check.
6619          */
6620         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6621                                       key->offset);
6622         if (ret < 0) {
6623                 error("chunk(%llu, %llu) is not valid, ignore it",
6624                       key->offset, btrfs_chunk_length(eb, chunk));
6625                 return 0;
6626         }
6627         rec = btrfs_new_chunk_record(eb, key, slot);
6628         ret = insert_cache_extent(chunk_cache, &rec->cache);
6629         if (ret) {
6630                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6631                         rec->offset, rec->length);
6632                 free(rec);
6633         }
6634
6635         return ret;
6636 }
6637
6638 static int process_device_item(struct rb_root *dev_cache,
6639                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6640 {
6641         struct btrfs_dev_item *ptr;
6642         struct device_record *rec;
6643         int ret = 0;
6644
6645         ptr = btrfs_item_ptr(eb,
6646                 slot, struct btrfs_dev_item);
6647
6648         rec = malloc(sizeof(*rec));
6649         if (!rec) {
6650                 fprintf(stderr, "memory allocation failed\n");
6651                 return -ENOMEM;
6652         }
6653
6654         rec->devid = key->offset;
6655         rec->generation = btrfs_header_generation(eb);
6656
6657         rec->objectid = key->objectid;
6658         rec->type = key->type;
6659         rec->offset = key->offset;
6660
6661         rec->devid = btrfs_device_id(eb, ptr);
6662         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6663         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6664
6665         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6666         if (ret) {
6667                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6668                 free(rec);
6669         }
6670
6671         return ret;
6672 }
6673
6674 struct block_group_record *
6675 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6676                              int slot)
6677 {
6678         struct btrfs_block_group_item *ptr;
6679         struct block_group_record *rec;
6680
6681         rec = calloc(1, sizeof(*rec));
6682         if (!rec) {
6683                 fprintf(stderr, "memory allocation failed\n");
6684                 exit(-1);
6685         }
6686
6687         rec->cache.start = key->objectid;
6688         rec->cache.size = key->offset;
6689
6690         rec->generation = btrfs_header_generation(leaf);
6691
6692         rec->objectid = key->objectid;
6693         rec->type = key->type;
6694         rec->offset = key->offset;
6695
6696         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6697         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6698
6699         INIT_LIST_HEAD(&rec->list);
6700
6701         return rec;
6702 }
6703
6704 static int process_block_group_item(struct block_group_tree *block_group_cache,
6705                                     struct btrfs_key *key,
6706                                     struct extent_buffer *eb, int slot)
6707 {
6708         struct block_group_record *rec;
6709         int ret = 0;
6710
6711         rec = btrfs_new_block_group_record(eb, key, slot);
6712         ret = insert_block_group_record(block_group_cache, rec);
6713         if (ret) {
6714                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6715                         rec->objectid, rec->offset);
6716                 free(rec);
6717         }
6718
6719         return ret;
6720 }
6721
6722 struct device_extent_record *
6723 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6724                                struct btrfs_key *key, int slot)
6725 {
6726         struct device_extent_record *rec;
6727         struct btrfs_dev_extent *ptr;
6728
6729         rec = calloc(1, sizeof(*rec));
6730         if (!rec) {
6731                 fprintf(stderr, "memory allocation failed\n");
6732                 exit(-1);
6733         }
6734
6735         rec->cache.objectid = key->objectid;
6736         rec->cache.start = key->offset;
6737
6738         rec->generation = btrfs_header_generation(leaf);
6739
6740         rec->objectid = key->objectid;
6741         rec->type = key->type;
6742         rec->offset = key->offset;
6743
6744         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6745         rec->chunk_objecteid =
6746                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6747         rec->chunk_offset =
6748                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6749         rec->length = btrfs_dev_extent_length(leaf, ptr);
6750         rec->cache.size = rec->length;
6751
6752         INIT_LIST_HEAD(&rec->chunk_list);
6753         INIT_LIST_HEAD(&rec->device_list);
6754
6755         return rec;
6756 }
6757
6758 static int
6759 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6760                            struct btrfs_key *key, struct extent_buffer *eb,
6761                            int slot)
6762 {
6763         struct device_extent_record *rec;
6764         int ret;
6765
6766         rec = btrfs_new_device_extent_record(eb, key, slot);
6767         ret = insert_device_extent_record(dev_extent_cache, rec);
6768         if (ret) {
6769                 fprintf(stderr,
6770                         "Device extent[%llu, %llu, %llu] existed.\n",
6771                         rec->objectid, rec->offset, rec->length);
6772                 free(rec);
6773         }
6774
6775         return ret;
6776 }
6777
6778 static int process_extent_item(struct btrfs_root *root,
6779                                struct cache_tree *extent_cache,
6780                                struct extent_buffer *eb, int slot)
6781 {
6782         struct btrfs_extent_item *ei;
6783         struct btrfs_extent_inline_ref *iref;
6784         struct btrfs_extent_data_ref *dref;
6785         struct btrfs_shared_data_ref *sref;
6786         struct btrfs_key key;
6787         struct extent_record tmpl;
6788         unsigned long end;
6789         unsigned long ptr;
6790         int ret;
6791         int type;
6792         u32 item_size = btrfs_item_size_nr(eb, slot);
6793         u64 refs = 0;
6794         u64 offset;
6795         u64 num_bytes;
6796         int metadata = 0;
6797
6798         btrfs_item_key_to_cpu(eb, &key, slot);
6799
6800         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6801                 metadata = 1;
6802                 num_bytes = root->nodesize;
6803         } else {
6804                 num_bytes = key.offset;
6805         }
6806
6807         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6808                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6809                       key.objectid, root->sectorsize);
6810                 return -EIO;
6811         }
6812         if (item_size < sizeof(*ei)) {
6813 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6814                 struct btrfs_extent_item_v0 *ei0;
6815                 BUG_ON(item_size != sizeof(*ei0));
6816                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6817                 refs = btrfs_extent_refs_v0(eb, ei0);
6818 #else
6819                 BUG();
6820 #endif
6821                 memset(&tmpl, 0, sizeof(tmpl));
6822                 tmpl.start = key.objectid;
6823                 tmpl.nr = num_bytes;
6824                 tmpl.extent_item_refs = refs;
6825                 tmpl.metadata = metadata;
6826                 tmpl.found_rec = 1;
6827                 tmpl.max_size = num_bytes;
6828
6829                 return add_extent_rec(extent_cache, &tmpl);
6830         }
6831
6832         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6833         refs = btrfs_extent_refs(eb, ei);
6834         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6835                 metadata = 1;
6836         else
6837                 metadata = 0;
6838         if (metadata && num_bytes != root->nodesize) {
6839                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6840                       num_bytes, root->nodesize);
6841                 return -EIO;
6842         }
6843         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6844                 error("ignore invalid data extent, length %llu is not aligned to %u",
6845                       num_bytes, root->sectorsize);
6846                 return -EIO;
6847         }
6848
6849         memset(&tmpl, 0, sizeof(tmpl));
6850         tmpl.start = key.objectid;
6851         tmpl.nr = num_bytes;
6852         tmpl.extent_item_refs = refs;
6853         tmpl.metadata = metadata;
6854         tmpl.found_rec = 1;
6855         tmpl.max_size = num_bytes;
6856         add_extent_rec(extent_cache, &tmpl);
6857
6858         ptr = (unsigned long)(ei + 1);
6859         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6860             key.type == BTRFS_EXTENT_ITEM_KEY)
6861                 ptr += sizeof(struct btrfs_tree_block_info);
6862
6863         end = (unsigned long)ei + item_size;
6864         while (ptr < end) {
6865                 iref = (struct btrfs_extent_inline_ref *)ptr;
6866                 type = btrfs_extent_inline_ref_type(eb, iref);
6867                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6868                 switch (type) {
6869                 case BTRFS_TREE_BLOCK_REF_KEY:
6870                         ret = add_tree_backref(extent_cache, key.objectid,
6871                                         0, offset, 0);
6872                         if (ret < 0)
6873                                 error(
6874                         "add_tree_backref failed (extent items tree block): %s",
6875                                       strerror(-ret));
6876                         break;
6877                 case BTRFS_SHARED_BLOCK_REF_KEY:
6878                         ret = add_tree_backref(extent_cache, key.objectid,
6879                                         offset, 0, 0);
6880                         if (ret < 0)
6881                                 error(
6882                         "add_tree_backref failed (extent items shared block): %s",
6883                                       strerror(-ret));
6884                         break;
6885                 case BTRFS_EXTENT_DATA_REF_KEY:
6886                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6887                         add_data_backref(extent_cache, key.objectid, 0,
6888                                         btrfs_extent_data_ref_root(eb, dref),
6889                                         btrfs_extent_data_ref_objectid(eb,
6890                                                                        dref),
6891                                         btrfs_extent_data_ref_offset(eb, dref),
6892                                         btrfs_extent_data_ref_count(eb, dref),
6893                                         0, num_bytes);
6894                         break;
6895                 case BTRFS_SHARED_DATA_REF_KEY:
6896                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6897                         add_data_backref(extent_cache, key.objectid, offset,
6898                                         0, 0, 0,
6899                                         btrfs_shared_data_ref_count(eb, sref),
6900                                         0, num_bytes);
6901                         break;
6902                 default:
6903                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6904                                 key.objectid, key.type, num_bytes);
6905                         goto out;
6906                 }
6907                 ptr += btrfs_extent_inline_ref_size(type);
6908         }
6909         WARN_ON(ptr > end);
6910 out:
6911         return 0;
6912 }
6913
6914 static int check_cache_range(struct btrfs_root *root,
6915                              struct btrfs_block_group_cache *cache,
6916                              u64 offset, u64 bytes)
6917 {
6918         struct btrfs_free_space *entry;
6919         u64 *logical;
6920         u64 bytenr;
6921         int stripe_len;
6922         int i, nr, ret;
6923
6924         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6925                 bytenr = btrfs_sb_offset(i);
6926                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6927                                        cache->key.objectid, bytenr, 0,
6928                                        &logical, &nr, &stripe_len);
6929                 if (ret)
6930                         return ret;
6931
6932                 while (nr--) {
6933                         if (logical[nr] + stripe_len <= offset)
6934                                 continue;
6935                         if (offset + bytes <= logical[nr])
6936                                 continue;
6937                         if (logical[nr] == offset) {
6938                                 if (stripe_len >= bytes) {
6939                                         free(logical);
6940                                         return 0;
6941                                 }
6942                                 bytes -= stripe_len;
6943                                 offset += stripe_len;
6944                         } else if (logical[nr] < offset) {
6945                                 if (logical[nr] + stripe_len >=
6946                                     offset + bytes) {
6947                                         free(logical);
6948                                         return 0;
6949                                 }
6950                                 bytes = (offset + bytes) -
6951                                         (logical[nr] + stripe_len);
6952                                 offset = logical[nr] + stripe_len;
6953                         } else {
6954                                 /*
6955                                  * Could be tricky, the super may land in the
6956                                  * middle of the area we're checking.  First
6957                                  * check the easiest case, it's at the end.
6958                                  */
6959                                 if (logical[nr] + stripe_len >=
6960                                     bytes + offset) {
6961                                         bytes = logical[nr] - offset;
6962                                         continue;
6963                                 }
6964
6965                                 /* Check the left side */
6966                                 ret = check_cache_range(root, cache,
6967                                                         offset,
6968                                                         logical[nr] - offset);
6969                                 if (ret) {
6970                                         free(logical);
6971                                         return ret;
6972                                 }
6973
6974                                 /* Now we continue with the right side */
6975                                 bytes = (offset + bytes) -
6976                                         (logical[nr] + stripe_len);
6977                                 offset = logical[nr] + stripe_len;
6978                         }
6979                 }
6980
6981                 free(logical);
6982         }
6983
6984         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6985         if (!entry) {
6986                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6987                         offset, offset+bytes);
6988                 return -EINVAL;
6989         }
6990
6991         if (entry->offset != offset) {
6992                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6993                         entry->offset);
6994                 return -EINVAL;
6995         }
6996
6997         if (entry->bytes != bytes) {
6998                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6999                         bytes, entry->bytes, offset);
7000                 return -EINVAL;
7001         }
7002
7003         unlink_free_space(cache->free_space_ctl, entry);
7004         free(entry);
7005         return 0;
7006 }
7007
7008 static int verify_space_cache(struct btrfs_root *root,
7009                               struct btrfs_block_group_cache *cache)
7010 {
7011         struct btrfs_path path;
7012         struct extent_buffer *leaf;
7013         struct btrfs_key key;
7014         u64 last;
7015         int ret = 0;
7016
7017         root = root->fs_info->extent_root;
7018
7019         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7020
7021         btrfs_init_path(&path);
7022         key.objectid = last;
7023         key.offset = 0;
7024         key.type = BTRFS_EXTENT_ITEM_KEY;
7025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7026         if (ret < 0)
7027                 goto out;
7028         ret = 0;
7029         while (1) {
7030                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7031                         ret = btrfs_next_leaf(root, &path);
7032                         if (ret < 0)
7033                                 goto out;
7034                         if (ret > 0) {
7035                                 ret = 0;
7036                                 break;
7037                         }
7038                 }
7039                 leaf = path.nodes[0];
7040                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7041                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7042                         break;
7043                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7044                     key.type != BTRFS_METADATA_ITEM_KEY) {
7045                         path.slots[0]++;
7046                         continue;
7047                 }
7048
7049                 if (last == key.objectid) {
7050                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7051                                 last = key.objectid + key.offset;
7052                         else
7053                                 last = key.objectid + root->nodesize;
7054                         path.slots[0]++;
7055                         continue;
7056                 }
7057
7058                 ret = check_cache_range(root, cache, last,
7059                                         key.objectid - last);
7060                 if (ret)
7061                         break;
7062                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7063                         last = key.objectid + key.offset;
7064                 else
7065                         last = key.objectid + root->nodesize;
7066                 path.slots[0]++;
7067         }
7068
7069         if (last < cache->key.objectid + cache->key.offset)
7070                 ret = check_cache_range(root, cache, last,
7071                                         cache->key.objectid +
7072                                         cache->key.offset - last);
7073
7074 out:
7075         btrfs_release_path(&path);
7076
7077         if (!ret &&
7078             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7079                 fprintf(stderr, "There are still entries left in the space "
7080                         "cache\n");
7081                 ret = -EINVAL;
7082         }
7083
7084         return ret;
7085 }
7086
7087 static int check_space_cache(struct btrfs_root *root)
7088 {
7089         struct btrfs_block_group_cache *cache;
7090         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7091         int ret;
7092         int error = 0;
7093
7094         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7095             btrfs_super_generation(root->fs_info->super_copy) !=
7096             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7097                 printf("cache and super generation don't match, space cache "
7098                        "will be invalidated\n");
7099                 return 0;
7100         }
7101
7102         if (ctx.progress_enabled) {
7103                 ctx.tp = TASK_FREE_SPACE;
7104                 task_start(ctx.info);
7105         }
7106
7107         while (1) {
7108                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7109                 if (!cache)
7110                         break;
7111
7112                 start = cache->key.objectid + cache->key.offset;
7113                 if (!cache->free_space_ctl) {
7114                         if (btrfs_init_free_space_ctl(cache,
7115                                                       root->sectorsize)) {
7116                                 ret = -ENOMEM;
7117                                 break;
7118                         }
7119                 } else {
7120                         btrfs_remove_free_space_cache(cache);
7121                 }
7122
7123                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7124                         ret = exclude_super_stripes(root, cache);
7125                         if (ret) {
7126                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7127                                         strerror(-ret));
7128                                 error++;
7129                                 continue;
7130                         }
7131                         ret = load_free_space_tree(root->fs_info, cache);
7132                         free_excluded_extents(root, cache);
7133                         if (ret < 0) {
7134                                 fprintf(stderr, "could not load free space tree: %s\n",
7135                                         strerror(-ret));
7136                                 error++;
7137                                 continue;
7138                         }
7139                         error += ret;
7140                 } else {
7141                         ret = load_free_space_cache(root->fs_info, cache);
7142                         if (!ret)
7143                                 continue;
7144                 }
7145
7146                 ret = verify_space_cache(root, cache);
7147                 if (ret) {
7148                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7149                                 cache->key.objectid);
7150                         error++;
7151                 }
7152         }
7153
7154         task_stop(ctx.info);
7155
7156         return error ? -EINVAL : 0;
7157 }
7158
7159 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7160                         u64 num_bytes, unsigned long leaf_offset,
7161                         struct extent_buffer *eb) {
7162
7163         u64 offset = 0;
7164         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7165         char *data;
7166         unsigned long csum_offset;
7167         u32 csum;
7168         u32 csum_expected;
7169         u64 read_len;
7170         u64 data_checked = 0;
7171         u64 tmp;
7172         int ret = 0;
7173         int mirror;
7174         int num_copies;
7175
7176         if (num_bytes % root->sectorsize)
7177                 return -EINVAL;
7178
7179         data = malloc(num_bytes);
7180         if (!data)
7181                 return -ENOMEM;
7182
7183         while (offset < num_bytes) {
7184                 mirror = 0;
7185 again:
7186                 read_len = num_bytes - offset;
7187                 /* read as much space once a time */
7188                 ret = read_extent_data(root, data + offset,
7189                                 bytenr + offset, &read_len, mirror);
7190                 if (ret)
7191                         goto out;
7192                 data_checked = 0;
7193                 /* verify every 4k data's checksum */
7194                 while (data_checked < read_len) {
7195                         csum = ~(u32)0;
7196                         tmp = offset + data_checked;
7197
7198                         csum = btrfs_csum_data((char *)data + tmp,
7199                                                csum, root->sectorsize);
7200                         btrfs_csum_final(csum, (u8 *)&csum);
7201
7202                         csum_offset = leaf_offset +
7203                                  tmp / root->sectorsize * csum_size;
7204                         read_extent_buffer(eb, (char *)&csum_expected,
7205                                            csum_offset, csum_size);
7206                         /* try another mirror */
7207                         if (csum != csum_expected) {
7208                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7209                                                 mirror, bytenr + tmp,
7210                                                 csum, csum_expected);
7211                                 num_copies = btrfs_num_copies(
7212                                                 &root->fs_info->mapping_tree,
7213                                                 bytenr, num_bytes);
7214                                 if (mirror < num_copies - 1) {
7215                                         mirror += 1;
7216                                         goto again;
7217                                 }
7218                         }
7219                         data_checked += root->sectorsize;
7220                 }
7221                 offset += read_len;
7222         }
7223 out:
7224         free(data);
7225         return ret;
7226 }
7227
7228 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7229                                u64 num_bytes)
7230 {
7231         struct btrfs_path path;
7232         struct extent_buffer *leaf;
7233         struct btrfs_key key;
7234         int ret;
7235
7236         btrfs_init_path(&path);
7237         key.objectid = bytenr;
7238         key.type = BTRFS_EXTENT_ITEM_KEY;
7239         key.offset = (u64)-1;
7240
7241 again:
7242         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7243                                 0, 0);
7244         if (ret < 0) {
7245                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7246                 btrfs_release_path(&path);
7247                 return ret;
7248         } else if (ret) {
7249                 if (path.slots[0] > 0) {
7250                         path.slots[0]--;
7251                 } else {
7252                         ret = btrfs_prev_leaf(root, &path);
7253                         if (ret < 0) {
7254                                 goto out;
7255                         } else if (ret > 0) {
7256                                 ret = 0;
7257                                 goto out;
7258                         }
7259                 }
7260         }
7261
7262         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7263
7264         /*
7265          * Block group items come before extent items if they have the same
7266          * bytenr, so walk back one more just in case.  Dear future traveller,
7267          * first congrats on mastering time travel.  Now if it's not too much
7268          * trouble could you go back to 2006 and tell Chris to make the
7269          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7270          * EXTENT_ITEM_KEY please?
7271          */
7272         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7273                 if (path.slots[0] > 0) {
7274                         path.slots[0]--;
7275                 } else {
7276                         ret = btrfs_prev_leaf(root, &path);
7277                         if (ret < 0) {
7278                                 goto out;
7279                         } else if (ret > 0) {
7280                                 ret = 0;
7281                                 goto out;
7282                         }
7283                 }
7284                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7285         }
7286
7287         while (num_bytes) {
7288                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7289                         ret = btrfs_next_leaf(root, &path);
7290                         if (ret < 0) {
7291                                 fprintf(stderr, "Error going to next leaf "
7292                                         "%d\n", ret);
7293                                 btrfs_release_path(&path);
7294                                 return ret;
7295                         } else if (ret) {
7296                                 break;
7297                         }
7298                 }
7299                 leaf = path.nodes[0];
7300                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7301                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7302                         path.slots[0]++;
7303                         continue;
7304                 }
7305                 if (key.objectid + key.offset < bytenr) {
7306                         path.slots[0]++;
7307                         continue;
7308                 }
7309                 if (key.objectid > bytenr + num_bytes)
7310                         break;
7311
7312                 if (key.objectid == bytenr) {
7313                         if (key.offset >= num_bytes) {
7314                                 num_bytes = 0;
7315                                 break;
7316                         }
7317                         num_bytes -= key.offset;
7318                         bytenr += key.offset;
7319                 } else if (key.objectid < bytenr) {
7320                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7321                                 num_bytes = 0;
7322                                 break;
7323                         }
7324                         num_bytes = (bytenr + num_bytes) -
7325                                 (key.objectid + key.offset);
7326                         bytenr = key.objectid + key.offset;
7327                 } else {
7328                         if (key.objectid + key.offset < bytenr + num_bytes) {
7329                                 u64 new_start = key.objectid + key.offset;
7330                                 u64 new_bytes = bytenr + num_bytes - new_start;
7331
7332                                 /*
7333                                  * Weird case, the extent is in the middle of
7334                                  * our range, we'll have to search one side
7335                                  * and then the other.  Not sure if this happens
7336                                  * in real life, but no harm in coding it up
7337                                  * anyway just in case.
7338                                  */
7339                                 btrfs_release_path(&path);
7340                                 ret = check_extent_exists(root, new_start,
7341                                                           new_bytes);
7342                                 if (ret) {
7343                                         fprintf(stderr, "Right section didn't "
7344                                                 "have a record\n");
7345                                         break;
7346                                 }
7347                                 num_bytes = key.objectid - bytenr;
7348                                 goto again;
7349                         }
7350                         num_bytes = key.objectid - bytenr;
7351                 }
7352                 path.slots[0]++;
7353         }
7354         ret = 0;
7355
7356 out:
7357         if (num_bytes && !ret) {
7358                 fprintf(stderr, "There are no extents for csum range "
7359                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7360                 ret = 1;
7361         }
7362
7363         btrfs_release_path(&path);
7364         return ret;
7365 }
7366
7367 static int check_csums(struct btrfs_root *root)
7368 {
7369         struct btrfs_path path;
7370         struct extent_buffer *leaf;
7371         struct btrfs_key key;
7372         u64 offset = 0, num_bytes = 0;
7373         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7374         int errors = 0;
7375         int ret;
7376         u64 data_len;
7377         unsigned long leaf_offset;
7378
7379         root = root->fs_info->csum_root;
7380         if (!extent_buffer_uptodate(root->node)) {
7381                 fprintf(stderr, "No valid csum tree found\n");
7382                 return -ENOENT;
7383         }
7384
7385         btrfs_init_path(&path);
7386         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7387         key.type = BTRFS_EXTENT_CSUM_KEY;
7388         key.offset = 0;
7389         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7390         if (ret < 0) {
7391                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7392                 btrfs_release_path(&path);
7393                 return ret;
7394         }
7395
7396         if (ret > 0 && path.slots[0])
7397                 path.slots[0]--;
7398         ret = 0;
7399
7400         while (1) {
7401                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7402                         ret = btrfs_next_leaf(root, &path);
7403                         if (ret < 0) {
7404                                 fprintf(stderr, "Error going to next leaf "
7405                                         "%d\n", ret);
7406                                 break;
7407                         }
7408                         if (ret)
7409                                 break;
7410                 }
7411                 leaf = path.nodes[0];
7412
7413                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7414                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7415                         path.slots[0]++;
7416                         continue;
7417                 }
7418
7419                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7420                               csum_size) * root->sectorsize;
7421                 if (!check_data_csum)
7422                         goto skip_csum_check;
7423                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7424                 ret = check_extent_csums(root, key.offset, data_len,
7425                                          leaf_offset, leaf);
7426                 if (ret)
7427                         break;
7428 skip_csum_check:
7429                 if (!num_bytes) {
7430                         offset = key.offset;
7431                 } else if (key.offset != offset + num_bytes) {
7432                         ret = check_extent_exists(root, offset, num_bytes);
7433                         if (ret) {
7434                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7435                                         "there is no extent record\n",
7436                                         offset, offset+num_bytes);
7437                                 errors++;
7438                         }
7439                         offset = key.offset;
7440                         num_bytes = 0;
7441                 }
7442                 num_bytes += data_len;
7443                 path.slots[0]++;
7444         }
7445
7446         btrfs_release_path(&path);
7447         return errors;
7448 }
7449
7450 static int is_dropped_key(struct btrfs_key *key,
7451                           struct btrfs_key *drop_key) {
7452         if (key->objectid < drop_key->objectid)
7453                 return 1;
7454         else if (key->objectid == drop_key->objectid) {
7455                 if (key->type < drop_key->type)
7456                         return 1;
7457                 else if (key->type == drop_key->type) {
7458                         if (key->offset < drop_key->offset)
7459                                 return 1;
7460                 }
7461         }
7462         return 0;
7463 }
7464
7465 /*
7466  * Here are the rules for FULL_BACKREF.
7467  *
7468  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7469  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7470  *      FULL_BACKREF set.
7471  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7472  *    if it happened after the relocation occurred since we'll have dropped the
7473  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7474  *    have no real way to know for sure.
7475  *
7476  * We process the blocks one root at a time, and we start from the lowest root
7477  * objectid and go to the highest.  So we can just lookup the owner backref for
7478  * the record and if we don't find it then we know it doesn't exist and we have
7479  * a FULL BACKREF.
7480  *
7481  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7482  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7483  * be set or not and then we can check later once we've gathered all the refs.
7484  */
7485 static int calc_extent_flag(struct cache_tree *extent_cache,
7486                            struct extent_buffer *buf,
7487                            struct root_item_record *ri,
7488                            u64 *flags)
7489 {
7490         struct extent_record *rec;
7491         struct cache_extent *cache;
7492         struct tree_backref *tback;
7493         u64 owner = 0;
7494
7495         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7496         /* we have added this extent before */
7497         if (!cache)
7498                 return -ENOENT;
7499
7500         rec = container_of(cache, struct extent_record, cache);
7501
7502         /*
7503          * Except file/reloc tree, we can not have
7504          * FULL BACKREF MODE
7505          */
7506         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7507                 goto normal;
7508         /*
7509          * root node
7510          */
7511         if (buf->start == ri->bytenr)
7512                 goto normal;
7513
7514         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7515                 goto full_backref;
7516
7517         owner = btrfs_header_owner(buf);
7518         if (owner == ri->objectid)
7519                 goto normal;
7520
7521         tback = find_tree_backref(rec, 0, owner);
7522         if (!tback)
7523                 goto full_backref;
7524 normal:
7525         *flags = 0;
7526         if (rec->flag_block_full_backref != FLAG_UNSET &&
7527             rec->flag_block_full_backref != 0)
7528                 rec->bad_full_backref = 1;
7529         return 0;
7530 full_backref:
7531         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7532         if (rec->flag_block_full_backref != FLAG_UNSET &&
7533             rec->flag_block_full_backref != 1)
7534                 rec->bad_full_backref = 1;
7535         return 0;
7536 }
7537
7538 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7539 {
7540         fprintf(stderr, "Invalid key type(");
7541         print_key_type(stderr, 0, key_type);
7542         fprintf(stderr, ") found in root(");
7543         print_objectid(stderr, rootid, 0);
7544         fprintf(stderr, ")\n");
7545 }
7546
7547 /*
7548  * Check if the key is valid with its extent buffer.
7549  *
7550  * This is a early check in case invalid key exists in a extent buffer
7551  * This is not comprehensive yet, but should prevent wrong key/item passed
7552  * further
7553  */
7554 static int check_type_with_root(u64 rootid, u8 key_type)
7555 {
7556         switch (key_type) {
7557         /* Only valid in chunk tree */
7558         case BTRFS_DEV_ITEM_KEY:
7559         case BTRFS_CHUNK_ITEM_KEY:
7560                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7561                         goto err;
7562                 break;
7563         /* valid in csum and log tree */
7564         case BTRFS_CSUM_TREE_OBJECTID:
7565                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7566                       is_fstree(rootid)))
7567                         goto err;
7568                 break;
7569         case BTRFS_EXTENT_ITEM_KEY:
7570         case BTRFS_METADATA_ITEM_KEY:
7571         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7572                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7573                         goto err;
7574                 break;
7575         case BTRFS_ROOT_ITEM_KEY:
7576                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7577                         goto err;
7578                 break;
7579         case BTRFS_DEV_EXTENT_KEY:
7580                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7581                         goto err;
7582                 break;
7583         }
7584         return 0;
7585 err:
7586         report_mismatch_key_root(key_type, rootid);
7587         return -EINVAL;
7588 }
7589
7590 static int run_next_block(struct btrfs_root *root,
7591                           struct block_info *bits,
7592                           int bits_nr,
7593                           u64 *last,
7594                           struct cache_tree *pending,
7595                           struct cache_tree *seen,
7596                           struct cache_tree *reada,
7597                           struct cache_tree *nodes,
7598                           struct cache_tree *extent_cache,
7599                           struct cache_tree *chunk_cache,
7600                           struct rb_root *dev_cache,
7601                           struct block_group_tree *block_group_cache,
7602                           struct device_extent_tree *dev_extent_cache,
7603                           struct root_item_record *ri)
7604 {
7605         struct extent_buffer *buf;
7606         struct extent_record *rec = NULL;
7607         u64 bytenr;
7608         u32 size;
7609         u64 parent;
7610         u64 owner;
7611         u64 flags;
7612         u64 ptr;
7613         u64 gen = 0;
7614         int ret = 0;
7615         int i;
7616         int nritems;
7617         struct btrfs_key key;
7618         struct cache_extent *cache;
7619         int reada_bits;
7620
7621         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7622                                     bits_nr, &reada_bits);
7623         if (nritems == 0)
7624                 return 1;
7625
7626         if (!reada_bits) {
7627                 for(i = 0; i < nritems; i++) {
7628                         ret = add_cache_extent(reada, bits[i].start,
7629                                                bits[i].size);
7630                         if (ret == -EEXIST)
7631                                 continue;
7632
7633                         /* fixme, get the parent transid */
7634                         readahead_tree_block(root, bits[i].start,
7635                                              bits[i].size, 0);
7636                 }
7637         }
7638         *last = bits[0].start;
7639         bytenr = bits[0].start;
7640         size = bits[0].size;
7641
7642         cache = lookup_cache_extent(pending, bytenr, size);
7643         if (cache) {
7644                 remove_cache_extent(pending, cache);
7645                 free(cache);
7646         }
7647         cache = lookup_cache_extent(reada, bytenr, size);
7648         if (cache) {
7649                 remove_cache_extent(reada, cache);
7650                 free(cache);
7651         }
7652         cache = lookup_cache_extent(nodes, bytenr, size);
7653         if (cache) {
7654                 remove_cache_extent(nodes, cache);
7655                 free(cache);
7656         }
7657         cache = lookup_cache_extent(extent_cache, bytenr, size);
7658         if (cache) {
7659                 rec = container_of(cache, struct extent_record, cache);
7660                 gen = rec->parent_generation;
7661         }
7662
7663         /* fixme, get the real parent transid */
7664         buf = read_tree_block(root, bytenr, size, gen);
7665         if (!extent_buffer_uptodate(buf)) {
7666                 record_bad_block_io(root->fs_info,
7667                                     extent_cache, bytenr, size);
7668                 goto out;
7669         }
7670
7671         nritems = btrfs_header_nritems(buf);
7672
7673         flags = 0;
7674         if (!init_extent_tree) {
7675                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7676                                        btrfs_header_level(buf), 1, NULL,
7677                                        &flags);
7678                 if (ret < 0) {
7679                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7680                         if (ret < 0) {
7681                                 fprintf(stderr, "Couldn't calc extent flags\n");
7682                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7683                         }
7684                 }
7685         } else {
7686                 flags = 0;
7687                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7688                 if (ret < 0) {
7689                         fprintf(stderr, "Couldn't calc extent flags\n");
7690                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7691                 }
7692         }
7693
7694         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7695                 if (ri != NULL &&
7696                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7697                     ri->objectid == btrfs_header_owner(buf)) {
7698                         /*
7699                          * Ok we got to this block from it's original owner and
7700                          * we have FULL_BACKREF set.  Relocation can leave
7701                          * converted blocks over so this is altogether possible,
7702                          * however it's not possible if the generation > the
7703                          * last snapshot, so check for this case.
7704                          */
7705                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7706                             btrfs_header_generation(buf) > ri->last_snapshot) {
7707                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7708                                 rec->bad_full_backref = 1;
7709                         }
7710                 }
7711         } else {
7712                 if (ri != NULL &&
7713                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7714                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7715                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7716                         rec->bad_full_backref = 1;
7717                 }
7718         }
7719
7720         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7721                 rec->flag_block_full_backref = 1;
7722                 parent = bytenr;
7723                 owner = 0;
7724         } else {
7725                 rec->flag_block_full_backref = 0;
7726                 parent = 0;
7727                 owner = btrfs_header_owner(buf);
7728         }
7729
7730         ret = check_block(root, extent_cache, buf, flags);
7731         if (ret)
7732                 goto out;
7733
7734         if (btrfs_is_leaf(buf)) {
7735                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7736                 for (i = 0; i < nritems; i++) {
7737                         struct btrfs_file_extent_item *fi;
7738                         btrfs_item_key_to_cpu(buf, &key, i);
7739                         /*
7740                          * Check key type against the leaf owner.
7741                          * Could filter quite a lot of early error if
7742                          * owner is correct
7743                          */
7744                         if (check_type_with_root(btrfs_header_owner(buf),
7745                                                  key.type)) {
7746                                 fprintf(stderr, "ignoring invalid key\n");
7747                                 continue;
7748                         }
7749                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7750                                 process_extent_item(root, extent_cache, buf,
7751                                                     i);
7752                                 continue;
7753                         }
7754                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7755                                 process_extent_item(root, extent_cache, buf,
7756                                                     i);
7757                                 continue;
7758                         }
7759                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7760                                 total_csum_bytes +=
7761                                         btrfs_item_size_nr(buf, i);
7762                                 continue;
7763                         }
7764                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7765                                 process_chunk_item(chunk_cache, &key, buf, i);
7766                                 continue;
7767                         }
7768                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7769                                 process_device_item(dev_cache, &key, buf, i);
7770                                 continue;
7771                         }
7772                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7773                                 process_block_group_item(block_group_cache,
7774                                         &key, buf, i);
7775                                 continue;
7776                         }
7777                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7778                                 process_device_extent_item(dev_extent_cache,
7779                                         &key, buf, i);
7780                                 continue;
7781
7782                         }
7783                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7784 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7785                                 process_extent_ref_v0(extent_cache, buf, i);
7786 #else
7787                                 BUG();
7788 #endif
7789                                 continue;
7790                         }
7791
7792                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7793                                 ret = add_tree_backref(extent_cache,
7794                                                 key.objectid, 0, key.offset, 0);
7795                                 if (ret < 0)
7796                                         error(
7797                                 "add_tree_backref failed (leaf tree block): %s",
7798                                               strerror(-ret));
7799                                 continue;
7800                         }
7801                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7802                                 ret = add_tree_backref(extent_cache,
7803                                                 key.objectid, key.offset, 0, 0);
7804                                 if (ret < 0)
7805                                         error(
7806                                 "add_tree_backref failed (leaf shared block): %s",
7807                                               strerror(-ret));
7808                                 continue;
7809                         }
7810                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7811                                 struct btrfs_extent_data_ref *ref;
7812                                 ref = btrfs_item_ptr(buf, i,
7813                                                 struct btrfs_extent_data_ref);
7814                                 add_data_backref(extent_cache,
7815                                         key.objectid, 0,
7816                                         btrfs_extent_data_ref_root(buf, ref),
7817                                         btrfs_extent_data_ref_objectid(buf,
7818                                                                        ref),
7819                                         btrfs_extent_data_ref_offset(buf, ref),
7820                                         btrfs_extent_data_ref_count(buf, ref),
7821                                         0, root->sectorsize);
7822                                 continue;
7823                         }
7824                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7825                                 struct btrfs_shared_data_ref *ref;
7826                                 ref = btrfs_item_ptr(buf, i,
7827                                                 struct btrfs_shared_data_ref);
7828                                 add_data_backref(extent_cache,
7829                                         key.objectid, key.offset, 0, 0, 0,
7830                                         btrfs_shared_data_ref_count(buf, ref),
7831                                         0, root->sectorsize);
7832                                 continue;
7833                         }
7834                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7835                                 struct bad_item *bad;
7836
7837                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7838                                         continue;
7839                                 if (!owner)
7840                                         continue;
7841                                 bad = malloc(sizeof(struct bad_item));
7842                                 if (!bad)
7843                                         continue;
7844                                 INIT_LIST_HEAD(&bad->list);
7845                                 memcpy(&bad->key, &key,
7846                                        sizeof(struct btrfs_key));
7847                                 bad->root_id = owner;
7848                                 list_add_tail(&bad->list, &delete_items);
7849                                 continue;
7850                         }
7851                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7852                                 continue;
7853                         fi = btrfs_item_ptr(buf, i,
7854                                             struct btrfs_file_extent_item);
7855                         if (btrfs_file_extent_type(buf, fi) ==
7856                             BTRFS_FILE_EXTENT_INLINE)
7857                                 continue;
7858                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7859                                 continue;
7860
7861                         data_bytes_allocated +=
7862                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7863                         if (data_bytes_allocated < root->sectorsize) {
7864                                 abort();
7865                         }
7866                         data_bytes_referenced +=
7867                                 btrfs_file_extent_num_bytes(buf, fi);
7868                         add_data_backref(extent_cache,
7869                                 btrfs_file_extent_disk_bytenr(buf, fi),
7870                                 parent, owner, key.objectid, key.offset -
7871                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7872                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7873                 }
7874         } else {
7875                 int level;
7876                 struct btrfs_key first_key;
7877
7878                 first_key.objectid = 0;
7879
7880                 if (nritems > 0)
7881                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7882                 level = btrfs_header_level(buf);
7883                 for (i = 0; i < nritems; i++) {
7884                         struct extent_record tmpl;
7885
7886                         ptr = btrfs_node_blockptr(buf, i);
7887                         size = root->nodesize;
7888                         btrfs_node_key_to_cpu(buf, &key, i);
7889                         if (ri != NULL) {
7890                                 if ((level == ri->drop_level)
7891                                     && is_dropped_key(&key, &ri->drop_key)) {
7892                                         continue;
7893                                 }
7894                         }
7895
7896                         memset(&tmpl, 0, sizeof(tmpl));
7897                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7898                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7899                         tmpl.start = ptr;
7900                         tmpl.nr = size;
7901                         tmpl.refs = 1;
7902                         tmpl.metadata = 1;
7903                         tmpl.max_size = size;
7904                         ret = add_extent_rec(extent_cache, &tmpl);
7905                         if (ret < 0)
7906                                 goto out;
7907
7908                         ret = add_tree_backref(extent_cache, ptr, parent,
7909                                         owner, 1);
7910                         if (ret < 0) {
7911                                 error(
7912                                 "add_tree_backref failed (non-leaf block): %s",
7913                                       strerror(-ret));
7914                                 continue;
7915                         }
7916
7917                         if (level > 1) {
7918                                 add_pending(nodes, seen, ptr, size);
7919                         } else {
7920                                 add_pending(pending, seen, ptr, size);
7921                         }
7922                 }
7923                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7924                                       nritems) * sizeof(struct btrfs_key_ptr);
7925         }
7926         total_btree_bytes += buf->len;
7927         if (fs_root_objectid(btrfs_header_owner(buf)))
7928                 total_fs_tree_bytes += buf->len;
7929         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7930                 total_extent_tree_bytes += buf->len;
7931         if (!found_old_backref &&
7932             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7933             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7934             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7935                 found_old_backref = 1;
7936 out:
7937         free_extent_buffer(buf);
7938         return ret;
7939 }
7940
7941 static int add_root_to_pending(struct extent_buffer *buf,
7942                                struct cache_tree *extent_cache,
7943                                struct cache_tree *pending,
7944                                struct cache_tree *seen,
7945                                struct cache_tree *nodes,
7946                                u64 objectid)
7947 {
7948         struct extent_record tmpl;
7949         int ret;
7950
7951         if (btrfs_header_level(buf) > 0)
7952                 add_pending(nodes, seen, buf->start, buf->len);
7953         else
7954                 add_pending(pending, seen, buf->start, buf->len);
7955
7956         memset(&tmpl, 0, sizeof(tmpl));
7957         tmpl.start = buf->start;
7958         tmpl.nr = buf->len;
7959         tmpl.is_root = 1;
7960         tmpl.refs = 1;
7961         tmpl.metadata = 1;
7962         tmpl.max_size = buf->len;
7963         add_extent_rec(extent_cache, &tmpl);
7964
7965         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7966             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7967                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7968                                 0, 1);
7969         else
7970                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7971                                 1);
7972         return ret;
7973 }
7974
7975 /* as we fix the tree, we might be deleting blocks that
7976  * we're tracking for repair.  This hook makes sure we
7977  * remove any backrefs for blocks as we are fixing them.
7978  */
7979 static int free_extent_hook(struct btrfs_trans_handle *trans,
7980                             struct btrfs_root *root,
7981                             u64 bytenr, u64 num_bytes, u64 parent,
7982                             u64 root_objectid, u64 owner, u64 offset,
7983                             int refs_to_drop)
7984 {
7985         struct extent_record *rec;
7986         struct cache_extent *cache;
7987         int is_data;
7988         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7989
7990         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7991         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7992         if (!cache)
7993                 return 0;
7994
7995         rec = container_of(cache, struct extent_record, cache);
7996         if (is_data) {
7997                 struct data_backref *back;
7998                 back = find_data_backref(rec, parent, root_objectid, owner,
7999                                          offset, 1, bytenr, num_bytes);
8000                 if (!back)
8001                         goto out;
8002                 if (back->node.found_ref) {
8003                         back->found_ref -= refs_to_drop;
8004                         if (rec->refs)
8005                                 rec->refs -= refs_to_drop;
8006                 }
8007                 if (back->node.found_extent_tree) {
8008                         back->num_refs -= refs_to_drop;
8009                         if (rec->extent_item_refs)
8010                                 rec->extent_item_refs -= refs_to_drop;
8011                 }
8012                 if (back->found_ref == 0)
8013                         back->node.found_ref = 0;
8014                 if (back->num_refs == 0)
8015                         back->node.found_extent_tree = 0;
8016
8017                 if (!back->node.found_extent_tree && back->node.found_ref) {
8018                         list_del(&back->node.list);
8019                         free(back);
8020                 }
8021         } else {
8022                 struct tree_backref *back;
8023                 back = find_tree_backref(rec, parent, root_objectid);
8024                 if (!back)
8025                         goto out;
8026                 if (back->node.found_ref) {
8027                         if (rec->refs)
8028                                 rec->refs--;
8029                         back->node.found_ref = 0;
8030                 }
8031                 if (back->node.found_extent_tree) {
8032                         if (rec->extent_item_refs)
8033                                 rec->extent_item_refs--;
8034                         back->node.found_extent_tree = 0;
8035                 }
8036                 if (!back->node.found_extent_tree && back->node.found_ref) {
8037                         list_del(&back->node.list);
8038                         free(back);
8039                 }
8040         }
8041         maybe_free_extent_rec(extent_cache, rec);
8042 out:
8043         return 0;
8044 }
8045
8046 static int delete_extent_records(struct btrfs_trans_handle *trans,
8047                                  struct btrfs_root *root,
8048                                  struct btrfs_path *path,
8049                                  u64 bytenr)
8050 {
8051         struct btrfs_key key;
8052         struct btrfs_key found_key;
8053         struct extent_buffer *leaf;
8054         int ret;
8055         int slot;
8056
8057
8058         key.objectid = bytenr;
8059         key.type = (u8)-1;
8060         key.offset = (u64)-1;
8061
8062         while(1) {
8063                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8064                                         &key, path, 0, 1);
8065                 if (ret < 0)
8066                         break;
8067
8068                 if (ret > 0) {
8069                         ret = 0;
8070                         if (path->slots[0] == 0)
8071                                 break;
8072                         path->slots[0]--;
8073                 }
8074                 ret = 0;
8075
8076                 leaf = path->nodes[0];
8077                 slot = path->slots[0];
8078
8079                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8080                 if (found_key.objectid != bytenr)
8081                         break;
8082
8083                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8084                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8085                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8086                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8087                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8088                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8089                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8090                         btrfs_release_path(path);
8091                         if (found_key.type == 0) {
8092                                 if (found_key.offset == 0)
8093                                         break;
8094                                 key.offset = found_key.offset - 1;
8095                                 key.type = found_key.type;
8096                         }
8097                         key.type = found_key.type - 1;
8098                         key.offset = (u64)-1;
8099                         continue;
8100                 }
8101
8102                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8103                         found_key.objectid, found_key.type, found_key.offset);
8104
8105                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8106                 if (ret)
8107                         break;
8108                 btrfs_release_path(path);
8109
8110                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8111                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8112                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8113                                 found_key.offset : root->nodesize;
8114
8115                         ret = btrfs_update_block_group(trans, root, bytenr,
8116                                                        bytes, 0, 0);
8117                         if (ret)
8118                                 break;
8119                 }
8120         }
8121
8122         btrfs_release_path(path);
8123         return ret;
8124 }
8125
8126 /*
8127  * for a single backref, this will allocate a new extent
8128  * and add the backref to it.
8129  */
8130 static int record_extent(struct btrfs_trans_handle *trans,
8131                          struct btrfs_fs_info *info,
8132                          struct btrfs_path *path,
8133                          struct extent_record *rec,
8134                          struct extent_backref *back,
8135                          int allocated, u64 flags)
8136 {
8137         int ret = 0;
8138         struct btrfs_root *extent_root = info->extent_root;
8139         struct extent_buffer *leaf;
8140         struct btrfs_key ins_key;
8141         struct btrfs_extent_item *ei;
8142         struct data_backref *dback;
8143         struct btrfs_tree_block_info *bi;
8144
8145         if (!back->is_data)
8146                 rec->max_size = max_t(u64, rec->max_size,
8147                                     info->extent_root->nodesize);
8148
8149         if (!allocated) {
8150                 u32 item_size = sizeof(*ei);
8151
8152                 if (!back->is_data)
8153                         item_size += sizeof(*bi);
8154
8155                 ins_key.objectid = rec->start;
8156                 ins_key.offset = rec->max_size;
8157                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8158
8159                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8160                                         &ins_key, item_size);
8161                 if (ret)
8162                         goto fail;
8163
8164                 leaf = path->nodes[0];
8165                 ei = btrfs_item_ptr(leaf, path->slots[0],
8166                                     struct btrfs_extent_item);
8167
8168                 btrfs_set_extent_refs(leaf, ei, 0);
8169                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8170
8171                 if (back->is_data) {
8172                         btrfs_set_extent_flags(leaf, ei,
8173                                                BTRFS_EXTENT_FLAG_DATA);
8174                 } else {
8175                         struct btrfs_disk_key copy_key;;
8176
8177                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8178                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8179                                              sizeof(*bi));
8180
8181                         btrfs_set_disk_key_objectid(&copy_key,
8182                                                     rec->info_objectid);
8183                         btrfs_set_disk_key_type(&copy_key, 0);
8184                         btrfs_set_disk_key_offset(&copy_key, 0);
8185
8186                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8187                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8188
8189                         btrfs_set_extent_flags(leaf, ei,
8190                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8191                 }
8192
8193                 btrfs_mark_buffer_dirty(leaf);
8194                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8195                                                rec->max_size, 1, 0);
8196                 if (ret)
8197                         goto fail;
8198                 btrfs_release_path(path);
8199         }
8200
8201         if (back->is_data) {
8202                 u64 parent;
8203                 int i;
8204
8205                 dback = to_data_backref(back);
8206                 if (back->full_backref)
8207                         parent = dback->parent;
8208                 else
8209                         parent = 0;
8210
8211                 for (i = 0; i < dback->found_ref; i++) {
8212                         /* if parent != 0, we're doing a full backref
8213                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8214                          * just makes the backref allocator create a data
8215                          * backref
8216                          */
8217                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8218                                                    rec->start, rec->max_size,
8219                                                    parent,
8220                                                    dback->root,
8221                                                    parent ?
8222                                                    BTRFS_FIRST_FREE_OBJECTID :
8223                                                    dback->owner,
8224                                                    dback->offset);
8225                         if (ret)
8226                                 break;
8227                 }
8228                 fprintf(stderr, "adding new data backref"
8229                                 " on %llu %s %llu owner %llu"
8230                                 " offset %llu found %d\n",
8231                                 (unsigned long long)rec->start,
8232                                 back->full_backref ?
8233                                 "parent" : "root",
8234                                 back->full_backref ?
8235                                 (unsigned long long)parent :
8236                                 (unsigned long long)dback->root,
8237                                 (unsigned long long)dback->owner,
8238                                 (unsigned long long)dback->offset,
8239                                 dback->found_ref);
8240         } else {
8241                 u64 parent;
8242                 struct tree_backref *tback;
8243
8244                 tback = to_tree_backref(back);
8245                 if (back->full_backref)
8246                         parent = tback->parent;
8247                 else
8248                         parent = 0;
8249
8250                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8251                                            rec->start, rec->max_size,
8252                                            parent, tback->root, 0, 0);
8253                 fprintf(stderr, "adding new tree backref on "
8254                         "start %llu len %llu parent %llu root %llu\n",
8255                         rec->start, rec->max_size, parent, tback->root);
8256         }
8257 fail:
8258         btrfs_release_path(path);
8259         return ret;
8260 }
8261
8262 static struct extent_entry *find_entry(struct list_head *entries,
8263                                        u64 bytenr, u64 bytes)
8264 {
8265         struct extent_entry *entry = NULL;
8266
8267         list_for_each_entry(entry, entries, list) {
8268                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8269                         return entry;
8270         }
8271
8272         return NULL;
8273 }
8274
8275 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8276 {
8277         struct extent_entry *entry, *best = NULL, *prev = NULL;
8278
8279         list_for_each_entry(entry, entries, list) {
8280                 /*
8281                  * If there are as many broken entries as entries then we know
8282                  * not to trust this particular entry.
8283                  */
8284                 if (entry->broken == entry->count)
8285                         continue;
8286
8287                 /*
8288                  * Special case, when there are only two entries and 'best' is
8289                  * the first one
8290                  */
8291                 if (!prev) {
8292                         best = entry;
8293                         prev = entry;
8294                         continue;
8295                 }
8296
8297                 /*
8298                  * If our current entry == best then we can't be sure our best
8299                  * is really the best, so we need to keep searching.
8300                  */
8301                 if (best && best->count == entry->count) {
8302                         prev = entry;
8303                         best = NULL;
8304                         continue;
8305                 }
8306
8307                 /* Prev == entry, not good enough, have to keep searching */
8308                 if (!prev->broken && prev->count == entry->count)
8309                         continue;
8310
8311                 if (!best)
8312                         best = (prev->count > entry->count) ? prev : entry;
8313                 else if (best->count < entry->count)
8314                         best = entry;
8315                 prev = entry;
8316         }
8317
8318         return best;
8319 }
8320
8321 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8322                       struct data_backref *dback, struct extent_entry *entry)
8323 {
8324         struct btrfs_trans_handle *trans;
8325         struct btrfs_root *root;
8326         struct btrfs_file_extent_item *fi;
8327         struct extent_buffer *leaf;
8328         struct btrfs_key key;
8329         u64 bytenr, bytes;
8330         int ret, err;
8331
8332         key.objectid = dback->root;
8333         key.type = BTRFS_ROOT_ITEM_KEY;
8334         key.offset = (u64)-1;
8335         root = btrfs_read_fs_root(info, &key);
8336         if (IS_ERR(root)) {
8337                 fprintf(stderr, "Couldn't find root for our ref\n");
8338                 return -EINVAL;
8339         }
8340
8341         /*
8342          * The backref points to the original offset of the extent if it was
8343          * split, so we need to search down to the offset we have and then walk
8344          * forward until we find the backref we're looking for.
8345          */
8346         key.objectid = dback->owner;
8347         key.type = BTRFS_EXTENT_DATA_KEY;
8348         key.offset = dback->offset;
8349         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8350         if (ret < 0) {
8351                 fprintf(stderr, "Error looking up ref %d\n", ret);
8352                 return ret;
8353         }
8354
8355         while (1) {
8356                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8357                         ret = btrfs_next_leaf(root, path);
8358                         if (ret) {
8359                                 fprintf(stderr, "Couldn't find our ref, next\n");
8360                                 return -EINVAL;
8361                         }
8362                 }
8363                 leaf = path->nodes[0];
8364                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8365                 if (key.objectid != dback->owner ||
8366                     key.type != BTRFS_EXTENT_DATA_KEY) {
8367                         fprintf(stderr, "Couldn't find our ref, search\n");
8368                         return -EINVAL;
8369                 }
8370                 fi = btrfs_item_ptr(leaf, path->slots[0],
8371                                     struct btrfs_file_extent_item);
8372                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8373                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8374
8375                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8376                         break;
8377                 path->slots[0]++;
8378         }
8379
8380         btrfs_release_path(path);
8381
8382         trans = btrfs_start_transaction(root, 1);
8383         if (IS_ERR(trans))
8384                 return PTR_ERR(trans);
8385
8386         /*
8387          * Ok we have the key of the file extent we want to fix, now we can cow
8388          * down to the thing and fix it.
8389          */
8390         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8391         if (ret < 0) {
8392                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8393                         key.objectid, key.type, key.offset, ret);
8394                 goto out;
8395         }
8396         if (ret > 0) {
8397                 fprintf(stderr, "Well that's odd, we just found this key "
8398                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8399                         key.offset);
8400                 ret = -EINVAL;
8401                 goto out;
8402         }
8403         leaf = path->nodes[0];
8404         fi = btrfs_item_ptr(leaf, path->slots[0],
8405                             struct btrfs_file_extent_item);
8406
8407         if (btrfs_file_extent_compression(leaf, fi) &&
8408             dback->disk_bytenr != entry->bytenr) {
8409                 fprintf(stderr, "Ref doesn't match the record start and is "
8410                         "compressed, please take a btrfs-image of this file "
8411                         "system and send it to a btrfs developer so they can "
8412                         "complete this functionality for bytenr %Lu\n",
8413                         dback->disk_bytenr);
8414                 ret = -EINVAL;
8415                 goto out;
8416         }
8417
8418         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8419                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8420         } else if (dback->disk_bytenr > entry->bytenr) {
8421                 u64 off_diff, offset;
8422
8423                 off_diff = dback->disk_bytenr - entry->bytenr;
8424                 offset = btrfs_file_extent_offset(leaf, fi);
8425                 if (dback->disk_bytenr + offset +
8426                     btrfs_file_extent_num_bytes(leaf, fi) >
8427                     entry->bytenr + entry->bytes) {
8428                         fprintf(stderr, "Ref is past the entry end, please "
8429                                 "take a btrfs-image of this file system and "
8430                                 "send it to a btrfs developer, ref %Lu\n",
8431                                 dback->disk_bytenr);
8432                         ret = -EINVAL;
8433                         goto out;
8434                 }
8435                 offset += off_diff;
8436                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8437                 btrfs_set_file_extent_offset(leaf, fi, offset);
8438         } else if (dback->disk_bytenr < entry->bytenr) {
8439                 u64 offset;
8440
8441                 offset = btrfs_file_extent_offset(leaf, fi);
8442                 if (dback->disk_bytenr + offset < entry->bytenr) {
8443                         fprintf(stderr, "Ref is before the entry start, please"
8444                                 " take a btrfs-image of this file system and "
8445                                 "send it to a btrfs developer, ref %Lu\n",
8446                                 dback->disk_bytenr);
8447                         ret = -EINVAL;
8448                         goto out;
8449                 }
8450
8451                 offset += dback->disk_bytenr;
8452                 offset -= entry->bytenr;
8453                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8454                 btrfs_set_file_extent_offset(leaf, fi, offset);
8455         }
8456
8457         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8458
8459         /*
8460          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8461          * only do this if we aren't using compression, otherwise it's a
8462          * trickier case.
8463          */
8464         if (!btrfs_file_extent_compression(leaf, fi))
8465                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8466         else
8467                 printf("ram bytes may be wrong?\n");
8468         btrfs_mark_buffer_dirty(leaf);
8469 out:
8470         err = btrfs_commit_transaction(trans, root);
8471         btrfs_release_path(path);
8472         return ret ? ret : err;
8473 }
8474
8475 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8476                            struct extent_record *rec)
8477 {
8478         struct extent_backref *back;
8479         struct data_backref *dback;
8480         struct extent_entry *entry, *best = NULL;
8481         LIST_HEAD(entries);
8482         int nr_entries = 0;
8483         int broken_entries = 0;
8484         int ret = 0;
8485         short mismatch = 0;
8486
8487         /*
8488          * Metadata is easy and the backrefs should always agree on bytenr and
8489          * size, if not we've got bigger issues.
8490          */
8491         if (rec->metadata)
8492                 return 0;
8493
8494         list_for_each_entry(back, &rec->backrefs, list) {
8495                 if (back->full_backref || !back->is_data)
8496                         continue;
8497
8498                 dback = to_data_backref(back);
8499
8500                 /*
8501                  * We only pay attention to backrefs that we found a real
8502                  * backref for.
8503                  */
8504                 if (dback->found_ref == 0)
8505                         continue;
8506
8507                 /*
8508                  * For now we only catch when the bytes don't match, not the
8509                  * bytenr.  We can easily do this at the same time, but I want
8510                  * to have a fs image to test on before we just add repair
8511                  * functionality willy-nilly so we know we won't screw up the
8512                  * repair.
8513                  */
8514
8515                 entry = find_entry(&entries, dback->disk_bytenr,
8516                                    dback->bytes);
8517                 if (!entry) {
8518                         entry = malloc(sizeof(struct extent_entry));
8519                         if (!entry) {
8520                                 ret = -ENOMEM;
8521                                 goto out;
8522                         }
8523                         memset(entry, 0, sizeof(*entry));
8524                         entry->bytenr = dback->disk_bytenr;
8525                         entry->bytes = dback->bytes;
8526                         list_add_tail(&entry->list, &entries);
8527                         nr_entries++;
8528                 }
8529
8530                 /*
8531                  * If we only have on entry we may think the entries agree when
8532                  * in reality they don't so we have to do some extra checking.
8533                  */
8534                 if (dback->disk_bytenr != rec->start ||
8535                     dback->bytes != rec->nr || back->broken)
8536                         mismatch = 1;
8537
8538                 if (back->broken) {
8539                         entry->broken++;
8540                         broken_entries++;
8541                 }
8542
8543                 entry->count++;
8544         }
8545
8546         /* Yay all the backrefs agree, carry on good sir */
8547         if (nr_entries <= 1 && !mismatch)
8548                 goto out;
8549
8550         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8551                 "%Lu\n", rec->start);
8552
8553         /*
8554          * First we want to see if the backrefs can agree amongst themselves who
8555          * is right, so figure out which one of the entries has the highest
8556          * count.
8557          */
8558         best = find_most_right_entry(&entries);
8559
8560         /*
8561          * Ok so we may have an even split between what the backrefs think, so
8562          * this is where we use the extent ref to see what it thinks.
8563          */
8564         if (!best) {
8565                 entry = find_entry(&entries, rec->start, rec->nr);
8566                 if (!entry && (!broken_entries || !rec->found_rec)) {
8567                         fprintf(stderr, "Backrefs don't agree with each other "
8568                                 "and extent record doesn't agree with anybody,"
8569                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8570                                 rec->start, rec->nr);
8571                         ret = -EINVAL;
8572                         goto out;
8573                 } else if (!entry) {
8574                         /*
8575                          * Ok our backrefs were broken, we'll assume this is the
8576                          * correct value and add an entry for this range.
8577                          */
8578                         entry = malloc(sizeof(struct extent_entry));
8579                         if (!entry) {
8580                                 ret = -ENOMEM;
8581                                 goto out;
8582                         }
8583                         memset(entry, 0, sizeof(*entry));
8584                         entry->bytenr = rec->start;
8585                         entry->bytes = rec->nr;
8586                         list_add_tail(&entry->list, &entries);
8587                         nr_entries++;
8588                 }
8589                 entry->count++;
8590                 best = find_most_right_entry(&entries);
8591                 if (!best) {
8592                         fprintf(stderr, "Backrefs and extent record evenly "
8593                                 "split on who is right, this is going to "
8594                                 "require user input to fix bytenr %Lu bytes "
8595                                 "%Lu\n", rec->start, rec->nr);
8596                         ret = -EINVAL;
8597                         goto out;
8598                 }
8599         }
8600
8601         /*
8602          * I don't think this can happen currently as we'll abort() if we catch
8603          * this case higher up, but in case somebody removes that we still can't
8604          * deal with it properly here yet, so just bail out of that's the case.
8605          */
8606         if (best->bytenr != rec->start) {
8607                 fprintf(stderr, "Extent start and backref starts don't match, "
8608                         "please use btrfs-image on this file system and send "
8609                         "it to a btrfs developer so they can make fsck fix "
8610                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8611                         rec->start, rec->nr);
8612                 ret = -EINVAL;
8613                 goto out;
8614         }
8615
8616         /*
8617          * Ok great we all agreed on an extent record, let's go find the real
8618          * references and fix up the ones that don't match.
8619          */
8620         list_for_each_entry(back, &rec->backrefs, list) {
8621                 if (back->full_backref || !back->is_data)
8622                         continue;
8623
8624                 dback = to_data_backref(back);
8625
8626                 /*
8627                  * Still ignoring backrefs that don't have a real ref attached
8628                  * to them.
8629                  */
8630                 if (dback->found_ref == 0)
8631                         continue;
8632
8633                 if (dback->bytes == best->bytes &&
8634                     dback->disk_bytenr == best->bytenr)
8635                         continue;
8636
8637                 ret = repair_ref(info, path, dback, best);
8638                 if (ret)
8639                         goto out;
8640         }
8641
8642         /*
8643          * Ok we messed with the actual refs, which means we need to drop our
8644          * entire cache and go back and rescan.  I know this is a huge pain and
8645          * adds a lot of extra work, but it's the only way to be safe.  Once all
8646          * the backrefs agree we may not need to do anything to the extent
8647          * record itself.
8648          */
8649         ret = -EAGAIN;
8650 out:
8651         while (!list_empty(&entries)) {
8652                 entry = list_entry(entries.next, struct extent_entry, list);
8653                 list_del_init(&entry->list);
8654                 free(entry);
8655         }
8656         return ret;
8657 }
8658
8659 static int process_duplicates(struct cache_tree *extent_cache,
8660                               struct extent_record *rec)
8661 {
8662         struct extent_record *good, *tmp;
8663         struct cache_extent *cache;
8664         int ret;
8665
8666         /*
8667          * If we found a extent record for this extent then return, or if we
8668          * have more than one duplicate we are likely going to need to delete
8669          * something.
8670          */
8671         if (rec->found_rec || rec->num_duplicates > 1)
8672                 return 0;
8673
8674         /* Shouldn't happen but just in case */
8675         BUG_ON(!rec->num_duplicates);
8676
8677         /*
8678          * So this happens if we end up with a backref that doesn't match the
8679          * actual extent entry.  So either the backref is bad or the extent
8680          * entry is bad.  Either way we want to have the extent_record actually
8681          * reflect what we found in the extent_tree, so we need to take the
8682          * duplicate out and use that as the extent_record since the only way we
8683          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8684          */
8685         remove_cache_extent(extent_cache, &rec->cache);
8686
8687         good = to_extent_record(rec->dups.next);
8688         list_del_init(&good->list);
8689         INIT_LIST_HEAD(&good->backrefs);
8690         INIT_LIST_HEAD(&good->dups);
8691         good->cache.start = good->start;
8692         good->cache.size = good->nr;
8693         good->content_checked = 0;
8694         good->owner_ref_checked = 0;
8695         good->num_duplicates = 0;
8696         good->refs = rec->refs;
8697         list_splice_init(&rec->backrefs, &good->backrefs);
8698         while (1) {
8699                 cache = lookup_cache_extent(extent_cache, good->start,
8700                                             good->nr);
8701                 if (!cache)
8702                         break;
8703                 tmp = container_of(cache, struct extent_record, cache);
8704
8705                 /*
8706                  * If we find another overlapping extent and it's found_rec is
8707                  * set then it's a duplicate and we need to try and delete
8708                  * something.
8709                  */
8710                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8711                         if (list_empty(&good->list))
8712                                 list_add_tail(&good->list,
8713                                               &duplicate_extents);
8714                         good->num_duplicates += tmp->num_duplicates + 1;
8715                         list_splice_init(&tmp->dups, &good->dups);
8716                         list_del_init(&tmp->list);
8717                         list_add_tail(&tmp->list, &good->dups);
8718                         remove_cache_extent(extent_cache, &tmp->cache);
8719                         continue;
8720                 }
8721
8722                 /*
8723                  * Ok we have another non extent item backed extent rec, so lets
8724                  * just add it to this extent and carry on like we did above.
8725                  */
8726                 good->refs += tmp->refs;
8727                 list_splice_init(&tmp->backrefs, &good->backrefs);
8728                 remove_cache_extent(extent_cache, &tmp->cache);
8729                 free(tmp);
8730         }
8731         ret = insert_cache_extent(extent_cache, &good->cache);
8732         BUG_ON(ret);
8733         free(rec);
8734         return good->num_duplicates ? 0 : 1;
8735 }
8736
8737 static int delete_duplicate_records(struct btrfs_root *root,
8738                                     struct extent_record *rec)
8739 {
8740         struct btrfs_trans_handle *trans;
8741         LIST_HEAD(delete_list);
8742         struct btrfs_path path;
8743         struct extent_record *tmp, *good, *n;
8744         int nr_del = 0;
8745         int ret = 0, err;
8746         struct btrfs_key key;
8747
8748         btrfs_init_path(&path);
8749
8750         good = rec;
8751         /* Find the record that covers all of the duplicates. */
8752         list_for_each_entry(tmp, &rec->dups, list) {
8753                 if (good->start < tmp->start)
8754                         continue;
8755                 if (good->nr > tmp->nr)
8756                         continue;
8757
8758                 if (tmp->start + tmp->nr < good->start + good->nr) {
8759                         fprintf(stderr, "Ok we have overlapping extents that "
8760                                 "aren't completely covered by each other, this "
8761                                 "is going to require more careful thought.  "
8762                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8763                                 tmp->start, tmp->nr, good->start, good->nr);
8764                         abort();
8765                 }
8766                 good = tmp;
8767         }
8768
8769         if (good != rec)
8770                 list_add_tail(&rec->list, &delete_list);
8771
8772         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8773                 if (tmp == good)
8774                         continue;
8775                 list_move_tail(&tmp->list, &delete_list);
8776         }
8777
8778         root = root->fs_info->extent_root;
8779         trans = btrfs_start_transaction(root, 1);
8780         if (IS_ERR(trans)) {
8781                 ret = PTR_ERR(trans);
8782                 goto out;
8783         }
8784
8785         list_for_each_entry(tmp, &delete_list, list) {
8786                 if (tmp->found_rec == 0)
8787                         continue;
8788                 key.objectid = tmp->start;
8789                 key.type = BTRFS_EXTENT_ITEM_KEY;
8790                 key.offset = tmp->nr;
8791
8792                 /* Shouldn't happen but just in case */
8793                 if (tmp->metadata) {
8794                         fprintf(stderr, "Well this shouldn't happen, extent "
8795                                 "record overlaps but is metadata? "
8796                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8797                         abort();
8798                 }
8799
8800                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8801                 if (ret) {
8802                         if (ret > 0)
8803                                 ret = -EINVAL;
8804                         break;
8805                 }
8806                 ret = btrfs_del_item(trans, root, &path);
8807                 if (ret)
8808                         break;
8809                 btrfs_release_path(&path);
8810                 nr_del++;
8811         }
8812         err = btrfs_commit_transaction(trans, root);
8813         if (err && !ret)
8814                 ret = err;
8815 out:
8816         while (!list_empty(&delete_list)) {
8817                 tmp = to_extent_record(delete_list.next);
8818                 list_del_init(&tmp->list);
8819                 if (tmp == rec)
8820                         continue;
8821                 free(tmp);
8822         }
8823
8824         while (!list_empty(&rec->dups)) {
8825                 tmp = to_extent_record(rec->dups.next);
8826                 list_del_init(&tmp->list);
8827                 free(tmp);
8828         }
8829
8830         btrfs_release_path(&path);
8831
8832         if (!ret && !nr_del)
8833                 rec->num_duplicates = 0;
8834
8835         return ret ? ret : nr_del;
8836 }
8837
8838 static int find_possible_backrefs(struct btrfs_fs_info *info,
8839                                   struct btrfs_path *path,
8840                                   struct cache_tree *extent_cache,
8841                                   struct extent_record *rec)
8842 {
8843         struct btrfs_root *root;
8844         struct extent_backref *back;
8845         struct data_backref *dback;
8846         struct cache_extent *cache;
8847         struct btrfs_file_extent_item *fi;
8848         struct btrfs_key key;
8849         u64 bytenr, bytes;
8850         int ret;
8851
8852         list_for_each_entry(back, &rec->backrefs, list) {
8853                 /* Don't care about full backrefs (poor unloved backrefs) */
8854                 if (back->full_backref || !back->is_data)
8855                         continue;
8856
8857                 dback = to_data_backref(back);
8858
8859                 /* We found this one, we don't need to do a lookup */
8860                 if (dback->found_ref)
8861                         continue;
8862
8863                 key.objectid = dback->root;
8864                 key.type = BTRFS_ROOT_ITEM_KEY;
8865                 key.offset = (u64)-1;
8866
8867                 root = btrfs_read_fs_root(info, &key);
8868
8869                 /* No root, definitely a bad ref, skip */
8870                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8871                         continue;
8872                 /* Other err, exit */
8873                 if (IS_ERR(root))
8874                         return PTR_ERR(root);
8875
8876                 key.objectid = dback->owner;
8877                 key.type = BTRFS_EXTENT_DATA_KEY;
8878                 key.offset = dback->offset;
8879                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8880                 if (ret) {
8881                         btrfs_release_path(path);
8882                         if (ret < 0)
8883                                 return ret;
8884                         /* Didn't find it, we can carry on */
8885                         ret = 0;
8886                         continue;
8887                 }
8888
8889                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8890                                     struct btrfs_file_extent_item);
8891                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8892                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8893                 btrfs_release_path(path);
8894                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8895                 if (cache) {
8896                         struct extent_record *tmp;
8897                         tmp = container_of(cache, struct extent_record, cache);
8898
8899                         /*
8900                          * If we found an extent record for the bytenr for this
8901                          * particular backref then we can't add it to our
8902                          * current extent record.  We only want to add backrefs
8903                          * that don't have a corresponding extent item in the
8904                          * extent tree since they likely belong to this record
8905                          * and we need to fix it if it doesn't match bytenrs.
8906                          */
8907                         if  (tmp->found_rec)
8908                                 continue;
8909                 }
8910
8911                 dback->found_ref += 1;
8912                 dback->disk_bytenr = bytenr;
8913                 dback->bytes = bytes;
8914
8915                 /*
8916                  * Set this so the verify backref code knows not to trust the
8917                  * values in this backref.
8918                  */
8919                 back->broken = 1;
8920         }
8921
8922         return 0;
8923 }
8924
8925 /*
8926  * Record orphan data ref into corresponding root.
8927  *
8928  * Return 0 if the extent item contains data ref and recorded.
8929  * Return 1 if the extent item contains no useful data ref
8930  *   On that case, it may contains only shared_dataref or metadata backref
8931  *   or the file extent exists(this should be handled by the extent bytenr
8932  *   recovery routine)
8933  * Return <0 if something goes wrong.
8934  */
8935 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8936                                       struct extent_record *rec)
8937 {
8938         struct btrfs_key key;
8939         struct btrfs_root *dest_root;
8940         struct extent_backref *back;
8941         struct data_backref *dback;
8942         struct orphan_data_extent *orphan;
8943         struct btrfs_path path;
8944         int recorded_data_ref = 0;
8945         int ret = 0;
8946
8947         if (rec->metadata)
8948                 return 1;
8949         btrfs_init_path(&path);
8950         list_for_each_entry(back, &rec->backrefs, list) {
8951                 if (back->full_backref || !back->is_data ||
8952                     !back->found_extent_tree)
8953                         continue;
8954                 dback = to_data_backref(back);
8955                 if (dback->found_ref)
8956                         continue;
8957                 key.objectid = dback->root;
8958                 key.type = BTRFS_ROOT_ITEM_KEY;
8959                 key.offset = (u64)-1;
8960
8961                 dest_root = btrfs_read_fs_root(fs_info, &key);
8962
8963                 /* For non-exist root we just skip it */
8964                 if (IS_ERR(dest_root) || !dest_root)
8965                         continue;
8966
8967                 key.objectid = dback->owner;
8968                 key.type = BTRFS_EXTENT_DATA_KEY;
8969                 key.offset = dback->offset;
8970
8971                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8972                 btrfs_release_path(&path);
8973                 /*
8974                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8975                  * we need to record it for inode/file extent rebuild.
8976                  * For ret > 0, we record it only for file extent rebuild.
8977                  * For ret == 0, the file extent exists but only bytenr
8978                  * mismatch, let the original bytenr fix routine to handle,
8979                  * don't record it.
8980                  */
8981                 if (ret == 0)
8982                         continue;
8983                 ret = 0;
8984                 orphan = malloc(sizeof(*orphan));
8985                 if (!orphan) {
8986                         ret = -ENOMEM;
8987                         goto out;
8988                 }
8989                 INIT_LIST_HEAD(&orphan->list);
8990                 orphan->root = dback->root;
8991                 orphan->objectid = dback->owner;
8992                 orphan->offset = dback->offset;
8993                 orphan->disk_bytenr = rec->cache.start;
8994                 orphan->disk_len = rec->cache.size;
8995                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8996                 recorded_data_ref = 1;
8997         }
8998 out:
8999         btrfs_release_path(&path);
9000         if (!ret)
9001                 return !recorded_data_ref;
9002         else
9003                 return ret;
9004 }
9005
9006 /*
9007  * when an incorrect extent item is found, this will delete
9008  * all of the existing entries for it and recreate them
9009  * based on what the tree scan found.
9010  */
9011 static int fixup_extent_refs(struct btrfs_fs_info *info,
9012                              struct cache_tree *extent_cache,
9013                              struct extent_record *rec)
9014 {
9015         struct btrfs_trans_handle *trans = NULL;
9016         int ret;
9017         struct btrfs_path path;
9018         struct list_head *cur = rec->backrefs.next;
9019         struct cache_extent *cache;
9020         struct extent_backref *back;
9021         int allocated = 0;
9022         u64 flags = 0;
9023
9024         if (rec->flag_block_full_backref)
9025                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9026
9027         btrfs_init_path(&path);
9028         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9029                 /*
9030                  * Sometimes the backrefs themselves are so broken they don't
9031                  * get attached to any meaningful rec, so first go back and
9032                  * check any of our backrefs that we couldn't find and throw
9033                  * them into the list if we find the backref so that
9034                  * verify_backrefs can figure out what to do.
9035                  */
9036                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9037                 if (ret < 0)
9038                         goto out;
9039         }
9040
9041         /* step one, make sure all of the backrefs agree */
9042         ret = verify_backrefs(info, &path, rec);
9043         if (ret < 0)
9044                 goto out;
9045
9046         trans = btrfs_start_transaction(info->extent_root, 1);
9047         if (IS_ERR(trans)) {
9048                 ret = PTR_ERR(trans);
9049                 goto out;
9050         }
9051
9052         /* step two, delete all the existing records */
9053         ret = delete_extent_records(trans, info->extent_root, &path,
9054                                     rec->start);
9055
9056         if (ret < 0)
9057                 goto out;
9058
9059         /* was this block corrupt?  If so, don't add references to it */
9060         cache = lookup_cache_extent(info->corrupt_blocks,
9061                                     rec->start, rec->max_size);
9062         if (cache) {
9063                 ret = 0;
9064                 goto out;
9065         }
9066
9067         /* step three, recreate all the refs we did find */
9068         while(cur != &rec->backrefs) {
9069                 back = to_extent_backref(cur);
9070                 cur = cur->next;
9071
9072                 /*
9073                  * if we didn't find any references, don't create a
9074                  * new extent record
9075                  */
9076                 if (!back->found_ref)
9077                         continue;
9078
9079                 rec->bad_full_backref = 0;
9080                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9081                 allocated = 1;
9082
9083                 if (ret)
9084                         goto out;
9085         }
9086 out:
9087         if (trans) {
9088                 int err = btrfs_commit_transaction(trans, info->extent_root);
9089                 if (!ret)
9090                         ret = err;
9091         }
9092
9093         if (!ret)
9094                 fprintf(stderr, "Repaired extent references for %llu\n",
9095                                 (unsigned long long)rec->start);
9096
9097         btrfs_release_path(&path);
9098         return ret;
9099 }
9100
9101 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9102                               struct extent_record *rec)
9103 {
9104         struct btrfs_trans_handle *trans;
9105         struct btrfs_root *root = fs_info->extent_root;
9106         struct btrfs_path path;
9107         struct btrfs_extent_item *ei;
9108         struct btrfs_key key;
9109         u64 flags;
9110         int ret = 0;
9111
9112         key.objectid = rec->start;
9113         if (rec->metadata) {
9114                 key.type = BTRFS_METADATA_ITEM_KEY;
9115                 key.offset = rec->info_level;
9116         } else {
9117                 key.type = BTRFS_EXTENT_ITEM_KEY;
9118                 key.offset = rec->max_size;
9119         }
9120
9121         trans = btrfs_start_transaction(root, 0);
9122         if (IS_ERR(trans))
9123                 return PTR_ERR(trans);
9124
9125         btrfs_init_path(&path);
9126         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9127         if (ret < 0) {
9128                 btrfs_release_path(&path);
9129                 btrfs_commit_transaction(trans, root);
9130                 return ret;
9131         } else if (ret) {
9132                 fprintf(stderr, "Didn't find extent for %llu\n",
9133                         (unsigned long long)rec->start);
9134                 btrfs_release_path(&path);
9135                 btrfs_commit_transaction(trans, root);
9136                 return -ENOENT;
9137         }
9138
9139         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9140                             struct btrfs_extent_item);
9141         flags = btrfs_extent_flags(path.nodes[0], ei);
9142         if (rec->flag_block_full_backref) {
9143                 fprintf(stderr, "setting full backref on %llu\n",
9144                         (unsigned long long)key.objectid);
9145                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9146         } else {
9147                 fprintf(stderr, "clearing full backref on %llu\n",
9148                         (unsigned long long)key.objectid);
9149                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9150         }
9151         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9152         btrfs_mark_buffer_dirty(path.nodes[0]);
9153         btrfs_release_path(&path);
9154         ret = btrfs_commit_transaction(trans, root);
9155         if (!ret)
9156                 fprintf(stderr, "Repaired extent flags for %llu\n",
9157                                 (unsigned long long)rec->start);
9158
9159         return ret;
9160 }
9161
9162 /* right now we only prune from the extent allocation tree */
9163 static int prune_one_block(struct btrfs_trans_handle *trans,
9164                            struct btrfs_fs_info *info,
9165                            struct btrfs_corrupt_block *corrupt)
9166 {
9167         int ret;
9168         struct btrfs_path path;
9169         struct extent_buffer *eb;
9170         u64 found;
9171         int slot;
9172         int nritems;
9173         int level = corrupt->level + 1;
9174
9175         btrfs_init_path(&path);
9176 again:
9177         /* we want to stop at the parent to our busted block */
9178         path.lowest_level = level;
9179
9180         ret = btrfs_search_slot(trans, info->extent_root,
9181                                 &corrupt->key, &path, -1, 1);
9182
9183         if (ret < 0)
9184                 goto out;
9185
9186         eb = path.nodes[level];
9187         if (!eb) {
9188                 ret = -ENOENT;
9189                 goto out;
9190         }
9191
9192         /*
9193          * hopefully the search gave us the block we want to prune,
9194          * lets try that first
9195          */
9196         slot = path.slots[level];
9197         found =  btrfs_node_blockptr(eb, slot);
9198         if (found == corrupt->cache.start)
9199                 goto del_ptr;
9200
9201         nritems = btrfs_header_nritems(eb);
9202
9203         /* the search failed, lets scan this node and hope we find it */
9204         for (slot = 0; slot < nritems; slot++) {
9205                 found =  btrfs_node_blockptr(eb, slot);
9206                 if (found == corrupt->cache.start)
9207                         goto del_ptr;
9208         }
9209         /*
9210          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9211          * to this block
9212          */
9213         if (eb == info->extent_root->node) {
9214                 ret = -ENOENT;
9215                 goto out;
9216         } else {
9217                 level++;
9218                 btrfs_release_path(&path);
9219                 goto again;
9220         }
9221
9222 del_ptr:
9223         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9224         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9225
9226 out:
9227         btrfs_release_path(&path);
9228         return ret;
9229 }
9230
9231 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9232 {
9233         struct btrfs_trans_handle *trans = NULL;
9234         struct cache_extent *cache;
9235         struct btrfs_corrupt_block *corrupt;
9236
9237         while (1) {
9238                 cache = search_cache_extent(info->corrupt_blocks, 0);
9239                 if (!cache)
9240                         break;
9241                 if (!trans) {
9242                         trans = btrfs_start_transaction(info->extent_root, 1);
9243                         if (IS_ERR(trans))
9244                                 return PTR_ERR(trans);
9245                 }
9246                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9247                 prune_one_block(trans, info, corrupt);
9248                 remove_cache_extent(info->corrupt_blocks, cache);
9249         }
9250         if (trans)
9251                 return btrfs_commit_transaction(trans, info->extent_root);
9252         return 0;
9253 }
9254
9255 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9256 {
9257         struct btrfs_block_group_cache *cache;
9258         u64 start, end;
9259         int ret;
9260
9261         while (1) {
9262                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9263                                             &start, &end, EXTENT_DIRTY);
9264                 if (ret)
9265                         break;
9266                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9267         }
9268
9269         start = 0;
9270         while (1) {
9271                 cache = btrfs_lookup_first_block_group(fs_info, start);
9272                 if (!cache)
9273                         break;
9274                 if (cache->cached)
9275                         cache->cached = 0;
9276                 start = cache->key.objectid + cache->key.offset;
9277         }
9278 }
9279
9280 static int check_extent_refs(struct btrfs_root *root,
9281                              struct cache_tree *extent_cache)
9282 {
9283         struct extent_record *rec;
9284         struct cache_extent *cache;
9285         int ret = 0;
9286         int had_dups = 0;
9287
9288         if (repair) {
9289                 /*
9290                  * if we're doing a repair, we have to make sure
9291                  * we don't allocate from the problem extents.
9292                  * In the worst case, this will be all the
9293                  * extents in the FS
9294                  */
9295                 cache = search_cache_extent(extent_cache, 0);
9296                 while(cache) {
9297                         rec = container_of(cache, struct extent_record, cache);
9298                         set_extent_dirty(root->fs_info->excluded_extents,
9299                                          rec->start,
9300                                          rec->start + rec->max_size - 1);
9301                         cache = next_cache_extent(cache);
9302                 }
9303
9304                 /* pin down all the corrupted blocks too */
9305                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9306                 while(cache) {
9307                         set_extent_dirty(root->fs_info->excluded_extents,
9308                                          cache->start,
9309                                          cache->start + cache->size - 1);
9310                         cache = next_cache_extent(cache);
9311                 }
9312                 prune_corrupt_blocks(root->fs_info);
9313                 reset_cached_block_groups(root->fs_info);
9314         }
9315
9316         reset_cached_block_groups(root->fs_info);
9317
9318         /*
9319          * We need to delete any duplicate entries we find first otherwise we
9320          * could mess up the extent tree when we have backrefs that actually
9321          * belong to a different extent item and not the weird duplicate one.
9322          */
9323         while (repair && !list_empty(&duplicate_extents)) {
9324                 rec = to_extent_record(duplicate_extents.next);
9325                 list_del_init(&rec->list);
9326
9327                 /* Sometimes we can find a backref before we find an actual
9328                  * extent, so we need to process it a little bit to see if there
9329                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9330                  * if this is a backref screwup.  If we need to delete stuff
9331                  * process_duplicates() will return 0, otherwise it will return
9332                  * 1 and we
9333                  */
9334                 if (process_duplicates(extent_cache, rec))
9335                         continue;
9336                 ret = delete_duplicate_records(root, rec);
9337                 if (ret < 0)
9338                         return ret;
9339                 /*
9340                  * delete_duplicate_records will return the number of entries
9341                  * deleted, so if it's greater than 0 then we know we actually
9342                  * did something and we need to remove.
9343                  */
9344                 if (ret)
9345                         had_dups = 1;
9346         }
9347
9348         if (had_dups)
9349                 return -EAGAIN;
9350
9351         while(1) {
9352                 int cur_err = 0;
9353                 int fix = 0;
9354
9355                 cache = search_cache_extent(extent_cache, 0);
9356                 if (!cache)
9357                         break;
9358                 rec = container_of(cache, struct extent_record, cache);
9359                 if (rec->num_duplicates) {
9360                         fprintf(stderr, "extent item %llu has multiple extent "
9361                                 "items\n", (unsigned long long)rec->start);
9362                         cur_err = 1;
9363                 }
9364
9365                 if (rec->refs != rec->extent_item_refs) {
9366                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9367                                 (unsigned long long)rec->start,
9368                                 (unsigned long long)rec->nr);
9369                         fprintf(stderr, "extent item %llu, found %llu\n",
9370                                 (unsigned long long)rec->extent_item_refs,
9371                                 (unsigned long long)rec->refs);
9372                         ret = record_orphan_data_extents(root->fs_info, rec);
9373                         if (ret < 0)
9374                                 goto repair_abort;
9375                         fix = ret;
9376                         cur_err = 1;
9377                 }
9378                 if (all_backpointers_checked(rec, 1)) {
9379                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9380                                 (unsigned long long)rec->start,
9381                                 (unsigned long long)rec->nr);
9382                         fix = 1;
9383                         cur_err = 1;
9384                 }
9385                 if (!rec->owner_ref_checked) {
9386                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9387                                 (unsigned long long)rec->start,
9388                                 (unsigned long long)rec->nr);
9389                         fix = 1;
9390                         cur_err = 1;
9391                 }
9392
9393                 if (repair && fix) {
9394                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9395                         if (ret)
9396                                 goto repair_abort;
9397                 }
9398
9399
9400                 if (rec->bad_full_backref) {
9401                         fprintf(stderr, "bad full backref, on [%llu]\n",
9402                                 (unsigned long long)rec->start);
9403                         if (repair) {
9404                                 ret = fixup_extent_flags(root->fs_info, rec);
9405                                 if (ret)
9406                                         goto repair_abort;
9407                                 fix = 1;
9408                         }
9409                         cur_err = 1;
9410                 }
9411                 /*
9412                  * Although it's not a extent ref's problem, we reuse this
9413                  * routine for error reporting.
9414                  * No repair function yet.
9415                  */
9416                 if (rec->crossing_stripes) {
9417                         fprintf(stderr,
9418                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9419                                 rec->start, rec->start + rec->max_size);
9420                         cur_err = 1;
9421                 }
9422
9423                 if (rec->wrong_chunk_type) {
9424                         fprintf(stderr,
9425                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9426                                 rec->start, rec->start + rec->max_size);
9427                         cur_err = 1;
9428                 }
9429
9430                 remove_cache_extent(extent_cache, cache);
9431                 free_all_extent_backrefs(rec);
9432                 if (!init_extent_tree && repair && (!cur_err || fix))
9433                         clear_extent_dirty(root->fs_info->excluded_extents,
9434                                            rec->start,
9435                                            rec->start + rec->max_size - 1);
9436                 free(rec);
9437         }
9438 repair_abort:
9439         if (repair) {
9440                 if (ret && ret != -EAGAIN) {
9441                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9442                         exit(1);
9443                 } else if (!ret) {
9444                         struct btrfs_trans_handle *trans;
9445
9446                         root = root->fs_info->extent_root;
9447                         trans = btrfs_start_transaction(root, 1);
9448                         if (IS_ERR(trans)) {
9449                                 ret = PTR_ERR(trans);
9450                                 goto repair_abort;
9451                         }
9452
9453                         btrfs_fix_block_accounting(trans, root);
9454                         ret = btrfs_commit_transaction(trans, root);
9455                         if (ret)
9456                                 goto repair_abort;
9457                 }
9458                 return ret;
9459         }
9460         return 0;
9461 }
9462
9463 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9464 {
9465         u64 stripe_size;
9466
9467         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9468                 stripe_size = length;
9469                 stripe_size /= num_stripes;
9470         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9471                 stripe_size = length * 2;
9472                 stripe_size /= num_stripes;
9473         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9474                 stripe_size = length;
9475                 stripe_size /= (num_stripes - 1);
9476         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9477                 stripe_size = length;
9478                 stripe_size /= (num_stripes - 2);
9479         } else {
9480                 stripe_size = length;
9481         }
9482         return stripe_size;
9483 }
9484
9485 /*
9486  * Check the chunk with its block group/dev list ref:
9487  * Return 0 if all refs seems valid.
9488  * Return 1 if part of refs seems valid, need later check for rebuild ref
9489  * like missing block group and needs to search extent tree to rebuild them.
9490  * Return -1 if essential refs are missing and unable to rebuild.
9491  */
9492 static int check_chunk_refs(struct chunk_record *chunk_rec,
9493                             struct block_group_tree *block_group_cache,
9494                             struct device_extent_tree *dev_extent_cache,
9495                             int silent)
9496 {
9497         struct cache_extent *block_group_item;
9498         struct block_group_record *block_group_rec;
9499         struct cache_extent *dev_extent_item;
9500         struct device_extent_record *dev_extent_rec;
9501         u64 devid;
9502         u64 offset;
9503         u64 length;
9504         int metadump_v2 = 0;
9505         int i;
9506         int ret = 0;
9507
9508         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9509                                                chunk_rec->offset,
9510                                                chunk_rec->length);
9511         if (block_group_item) {
9512                 block_group_rec = container_of(block_group_item,
9513                                                struct block_group_record,
9514                                                cache);
9515                 if (chunk_rec->length != block_group_rec->offset ||
9516                     chunk_rec->offset != block_group_rec->objectid ||
9517                     (!metadump_v2 &&
9518                      chunk_rec->type_flags != block_group_rec->flags)) {
9519                         if (!silent)
9520                                 fprintf(stderr,
9521                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9522                                         chunk_rec->objectid,
9523                                         chunk_rec->type,
9524                                         chunk_rec->offset,
9525                                         chunk_rec->length,
9526                                         chunk_rec->offset,
9527                                         chunk_rec->type_flags,
9528                                         block_group_rec->objectid,
9529                                         block_group_rec->type,
9530                                         block_group_rec->offset,
9531                                         block_group_rec->offset,
9532                                         block_group_rec->objectid,
9533                                         block_group_rec->flags);
9534                         ret = -1;
9535                 } else {
9536                         list_del_init(&block_group_rec->list);
9537                         chunk_rec->bg_rec = block_group_rec;
9538                 }
9539         } else {
9540                 if (!silent)
9541                         fprintf(stderr,
9542                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9543                                 chunk_rec->objectid,
9544                                 chunk_rec->type,
9545                                 chunk_rec->offset,
9546                                 chunk_rec->length,
9547                                 chunk_rec->offset,
9548                                 chunk_rec->type_flags);
9549                 ret = 1;
9550         }
9551
9552         if (metadump_v2)
9553                 return ret;
9554
9555         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9556                                     chunk_rec->num_stripes);
9557         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9558                 devid = chunk_rec->stripes[i].devid;
9559                 offset = chunk_rec->stripes[i].offset;
9560                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9561                                                        devid, offset, length);
9562                 if (dev_extent_item) {
9563                         dev_extent_rec = container_of(dev_extent_item,
9564                                                 struct device_extent_record,
9565                                                 cache);
9566                         if (dev_extent_rec->objectid != devid ||
9567                             dev_extent_rec->offset != offset ||
9568                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9569                             dev_extent_rec->length != length) {
9570                                 if (!silent)
9571                                         fprintf(stderr,
9572                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9573                                                 chunk_rec->objectid,
9574                                                 chunk_rec->type,
9575                                                 chunk_rec->offset,
9576                                                 chunk_rec->stripes[i].devid,
9577                                                 chunk_rec->stripes[i].offset,
9578                                                 dev_extent_rec->objectid,
9579                                                 dev_extent_rec->offset,
9580                                                 dev_extent_rec->length);
9581                                 ret = -1;
9582                         } else {
9583                                 list_move(&dev_extent_rec->chunk_list,
9584                                           &chunk_rec->dextents);
9585                         }
9586                 } else {
9587                         if (!silent)
9588                                 fprintf(stderr,
9589                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9590                                         chunk_rec->objectid,
9591                                         chunk_rec->type,
9592                                         chunk_rec->offset,
9593                                         chunk_rec->stripes[i].devid,
9594                                         chunk_rec->stripes[i].offset);
9595                         ret = -1;
9596                 }
9597         }
9598         return ret;
9599 }
9600
9601 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9602 int check_chunks(struct cache_tree *chunk_cache,
9603                  struct block_group_tree *block_group_cache,
9604                  struct device_extent_tree *dev_extent_cache,
9605                  struct list_head *good, struct list_head *bad,
9606                  struct list_head *rebuild, int silent)
9607 {
9608         struct cache_extent *chunk_item;
9609         struct chunk_record *chunk_rec;
9610         struct block_group_record *bg_rec;
9611         struct device_extent_record *dext_rec;
9612         int err;
9613         int ret = 0;
9614
9615         chunk_item = first_cache_extent(chunk_cache);
9616         while (chunk_item) {
9617                 chunk_rec = container_of(chunk_item, struct chunk_record,
9618                                          cache);
9619                 err = check_chunk_refs(chunk_rec, block_group_cache,
9620                                        dev_extent_cache, silent);
9621                 if (err < 0)
9622                         ret = err;
9623                 if (err == 0 && good)
9624                         list_add_tail(&chunk_rec->list, good);
9625                 if (err > 0 && rebuild)
9626                         list_add_tail(&chunk_rec->list, rebuild);
9627                 if (err < 0 && bad)
9628                         list_add_tail(&chunk_rec->list, bad);
9629                 chunk_item = next_cache_extent(chunk_item);
9630         }
9631
9632         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9633                 if (!silent)
9634                         fprintf(stderr,
9635                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9636                                 bg_rec->objectid,
9637                                 bg_rec->offset,
9638                                 bg_rec->flags);
9639                 if (!ret)
9640                         ret = 1;
9641         }
9642
9643         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9644                             chunk_list) {
9645                 if (!silent)
9646                         fprintf(stderr,
9647                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9648                                 dext_rec->objectid,
9649                                 dext_rec->offset,
9650                                 dext_rec->length);
9651                 if (!ret)
9652                         ret = 1;
9653         }
9654         return ret;
9655 }
9656
9657
9658 static int check_device_used(struct device_record *dev_rec,
9659                              struct device_extent_tree *dext_cache)
9660 {
9661         struct cache_extent *cache;
9662         struct device_extent_record *dev_extent_rec;
9663         u64 total_byte = 0;
9664
9665         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9666         while (cache) {
9667                 dev_extent_rec = container_of(cache,
9668                                               struct device_extent_record,
9669                                               cache);
9670                 if (dev_extent_rec->objectid != dev_rec->devid)
9671                         break;
9672
9673                 list_del_init(&dev_extent_rec->device_list);
9674                 total_byte += dev_extent_rec->length;
9675                 cache = next_cache_extent(cache);
9676         }
9677
9678         if (total_byte != dev_rec->byte_used) {
9679                 fprintf(stderr,
9680                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9681                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9682                         dev_rec->type, dev_rec->offset);
9683                 return -1;
9684         } else {
9685                 return 0;
9686         }
9687 }
9688
9689 /* check btrfs_dev_item -> btrfs_dev_extent */
9690 static int check_devices(struct rb_root *dev_cache,
9691                          struct device_extent_tree *dev_extent_cache)
9692 {
9693         struct rb_node *dev_node;
9694         struct device_record *dev_rec;
9695         struct device_extent_record *dext_rec;
9696         int err;
9697         int ret = 0;
9698
9699         dev_node = rb_first(dev_cache);
9700         while (dev_node) {
9701                 dev_rec = container_of(dev_node, struct device_record, node);
9702                 err = check_device_used(dev_rec, dev_extent_cache);
9703                 if (err)
9704                         ret = err;
9705
9706                 dev_node = rb_next(dev_node);
9707         }
9708         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9709                             device_list) {
9710                 fprintf(stderr,
9711                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9712                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9713                 if (!ret)
9714                         ret = 1;
9715         }
9716         return ret;
9717 }
9718
9719 static int add_root_item_to_list(struct list_head *head,
9720                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9721                                   u8 level, u8 drop_level,
9722                                   int level_size, struct btrfs_key *drop_key)
9723 {
9724
9725         struct root_item_record *ri_rec;
9726         ri_rec = malloc(sizeof(*ri_rec));
9727         if (!ri_rec)
9728                 return -ENOMEM;
9729         ri_rec->bytenr = bytenr;
9730         ri_rec->objectid = objectid;
9731         ri_rec->level = level;
9732         ri_rec->level_size = level_size;
9733         ri_rec->drop_level = drop_level;
9734         ri_rec->last_snapshot = last_snapshot;
9735         if (drop_key)
9736                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9737         list_add_tail(&ri_rec->list, head);
9738
9739         return 0;
9740 }
9741
9742 static void free_root_item_list(struct list_head *list)
9743 {
9744         struct root_item_record *ri_rec;
9745
9746         while (!list_empty(list)) {
9747                 ri_rec = list_first_entry(list, struct root_item_record,
9748                                           list);
9749                 list_del_init(&ri_rec->list);
9750                 free(ri_rec);
9751         }
9752 }
9753
9754 static int deal_root_from_list(struct list_head *list,
9755                                struct btrfs_root *root,
9756                                struct block_info *bits,
9757                                int bits_nr,
9758                                struct cache_tree *pending,
9759                                struct cache_tree *seen,
9760                                struct cache_tree *reada,
9761                                struct cache_tree *nodes,
9762                                struct cache_tree *extent_cache,
9763                                struct cache_tree *chunk_cache,
9764                                struct rb_root *dev_cache,
9765                                struct block_group_tree *block_group_cache,
9766                                struct device_extent_tree *dev_extent_cache)
9767 {
9768         int ret = 0;
9769         u64 last;
9770
9771         while (!list_empty(list)) {
9772                 struct root_item_record *rec;
9773                 struct extent_buffer *buf;
9774                 rec = list_entry(list->next,
9775                                  struct root_item_record, list);
9776                 last = 0;
9777                 buf = read_tree_block(root->fs_info->tree_root,
9778                                       rec->bytenr, rec->level_size, 0);
9779                 if (!extent_buffer_uptodate(buf)) {
9780                         free_extent_buffer(buf);
9781                         ret = -EIO;
9782                         break;
9783                 }
9784                 ret = add_root_to_pending(buf, extent_cache, pending,
9785                                     seen, nodes, rec->objectid);
9786                 if (ret < 0)
9787                         break;
9788                 /*
9789                  * To rebuild extent tree, we need deal with snapshot
9790                  * one by one, otherwise we deal with node firstly which
9791                  * can maximize readahead.
9792                  */
9793                 while (1) {
9794                         ret = run_next_block(root, bits, bits_nr, &last,
9795                                              pending, seen, reada, nodes,
9796                                              extent_cache, chunk_cache,
9797                                              dev_cache, block_group_cache,
9798                                              dev_extent_cache, rec);
9799                         if (ret != 0)
9800                                 break;
9801                 }
9802                 free_extent_buffer(buf);
9803                 list_del(&rec->list);
9804                 free(rec);
9805                 if (ret < 0)
9806                         break;
9807         }
9808         while (ret >= 0) {
9809                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9810                                      reada, nodes, extent_cache, chunk_cache,
9811                                      dev_cache, block_group_cache,
9812                                      dev_extent_cache, NULL);
9813                 if (ret != 0) {
9814                         if (ret > 0)
9815                                 ret = 0;
9816                         break;
9817                 }
9818         }
9819         return ret;
9820 }
9821
9822 static int check_chunks_and_extents(struct btrfs_root *root)
9823 {
9824         struct rb_root dev_cache;
9825         struct cache_tree chunk_cache;
9826         struct block_group_tree block_group_cache;
9827         struct device_extent_tree dev_extent_cache;
9828         struct cache_tree extent_cache;
9829         struct cache_tree seen;
9830         struct cache_tree pending;
9831         struct cache_tree reada;
9832         struct cache_tree nodes;
9833         struct extent_io_tree excluded_extents;
9834         struct cache_tree corrupt_blocks;
9835         struct btrfs_path path;
9836         struct btrfs_key key;
9837         struct btrfs_key found_key;
9838         int ret, err = 0;
9839         struct block_info *bits;
9840         int bits_nr;
9841         struct extent_buffer *leaf;
9842         int slot;
9843         struct btrfs_root_item ri;
9844         struct list_head dropping_trees;
9845         struct list_head normal_trees;
9846         struct btrfs_root *root1;
9847         u64 objectid;
9848         u32 level_size;
9849         u8 level;
9850
9851         dev_cache = RB_ROOT;
9852         cache_tree_init(&chunk_cache);
9853         block_group_tree_init(&block_group_cache);
9854         device_extent_tree_init(&dev_extent_cache);
9855
9856         cache_tree_init(&extent_cache);
9857         cache_tree_init(&seen);
9858         cache_tree_init(&pending);
9859         cache_tree_init(&nodes);
9860         cache_tree_init(&reada);
9861         cache_tree_init(&corrupt_blocks);
9862         extent_io_tree_init(&excluded_extents);
9863         INIT_LIST_HEAD(&dropping_trees);
9864         INIT_LIST_HEAD(&normal_trees);
9865
9866         if (repair) {
9867                 root->fs_info->excluded_extents = &excluded_extents;
9868                 root->fs_info->fsck_extent_cache = &extent_cache;
9869                 root->fs_info->free_extent_hook = free_extent_hook;
9870                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9871         }
9872
9873         bits_nr = 1024;
9874         bits = malloc(bits_nr * sizeof(struct block_info));
9875         if (!bits) {
9876                 perror("malloc");
9877                 exit(1);
9878         }
9879
9880         if (ctx.progress_enabled) {
9881                 ctx.tp = TASK_EXTENTS;
9882                 task_start(ctx.info);
9883         }
9884
9885 again:
9886         root1 = root->fs_info->tree_root;
9887         level = btrfs_header_level(root1->node);
9888         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9889                                     root1->node->start, 0, level, 0,
9890                                     root1->nodesize, NULL);
9891         if (ret < 0)
9892                 goto out;
9893         root1 = root->fs_info->chunk_root;
9894         level = btrfs_header_level(root1->node);
9895         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9896                                     root1->node->start, 0, level, 0,
9897                                     root1->nodesize, NULL);
9898         if (ret < 0)
9899                 goto out;
9900         btrfs_init_path(&path);
9901         key.offset = 0;
9902         key.objectid = 0;
9903         key.type = BTRFS_ROOT_ITEM_KEY;
9904         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9905                                         &key, &path, 0, 0);
9906         if (ret < 0)
9907                 goto out;
9908         while(1) {
9909                 leaf = path.nodes[0];
9910                 slot = path.slots[0];
9911                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9912                         ret = btrfs_next_leaf(root, &path);
9913                         if (ret != 0)
9914                                 break;
9915                         leaf = path.nodes[0];
9916                         slot = path.slots[0];
9917                 }
9918                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9919                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9920                         unsigned long offset;
9921                         u64 last_snapshot;
9922
9923                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9924                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9925                         last_snapshot = btrfs_root_last_snapshot(&ri);
9926                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9927                                 level = btrfs_root_level(&ri);
9928                                 level_size = root->nodesize;
9929                                 ret = add_root_item_to_list(&normal_trees,
9930                                                 found_key.objectid,
9931                                                 btrfs_root_bytenr(&ri),
9932                                                 last_snapshot, level,
9933                                                 0, level_size, NULL);
9934                                 if (ret < 0)
9935                                         goto out;
9936                         } else {
9937                                 level = btrfs_root_level(&ri);
9938                                 level_size = root->nodesize;
9939                                 objectid = found_key.objectid;
9940                                 btrfs_disk_key_to_cpu(&found_key,
9941                                                       &ri.drop_progress);
9942                                 ret = add_root_item_to_list(&dropping_trees,
9943                                                 objectid,
9944                                                 btrfs_root_bytenr(&ri),
9945                                                 last_snapshot, level,
9946                                                 ri.drop_level,
9947                                                 level_size, &found_key);
9948                                 if (ret < 0)
9949                                         goto out;
9950                         }
9951                 }
9952                 path.slots[0]++;
9953         }
9954         btrfs_release_path(&path);
9955
9956         /*
9957          * check_block can return -EAGAIN if it fixes something, please keep
9958          * this in mind when dealing with return values from these functions, if
9959          * we get -EAGAIN we want to fall through and restart the loop.
9960          */
9961         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9962                                   &seen, &reada, &nodes, &extent_cache,
9963                                   &chunk_cache, &dev_cache, &block_group_cache,
9964                                   &dev_extent_cache);
9965         if (ret < 0) {
9966                 if (ret == -EAGAIN)
9967                         goto loop;
9968                 goto out;
9969         }
9970         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9971                                   &pending, &seen, &reada, &nodes,
9972                                   &extent_cache, &chunk_cache, &dev_cache,
9973                                   &block_group_cache, &dev_extent_cache);
9974         if (ret < 0) {
9975                 if (ret == -EAGAIN)
9976                         goto loop;
9977                 goto out;
9978         }
9979
9980         ret = check_chunks(&chunk_cache, &block_group_cache,
9981                            &dev_extent_cache, NULL, NULL, NULL, 0);
9982         if (ret) {
9983                 if (ret == -EAGAIN)
9984                         goto loop;
9985                 err = ret;
9986         }
9987
9988         ret = check_extent_refs(root, &extent_cache);
9989         if (ret < 0) {
9990                 if (ret == -EAGAIN)
9991                         goto loop;
9992                 goto out;
9993         }
9994
9995         ret = check_devices(&dev_cache, &dev_extent_cache);
9996         if (ret && err)
9997                 ret = err;
9998
9999 out:
10000         task_stop(ctx.info);
10001         if (repair) {
10002                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10003                 extent_io_tree_cleanup(&excluded_extents);
10004                 root->fs_info->fsck_extent_cache = NULL;
10005                 root->fs_info->free_extent_hook = NULL;
10006                 root->fs_info->corrupt_blocks = NULL;
10007                 root->fs_info->excluded_extents = NULL;
10008         }
10009         free(bits);
10010         free_chunk_cache_tree(&chunk_cache);
10011         free_device_cache_tree(&dev_cache);
10012         free_block_group_tree(&block_group_cache);
10013         free_device_extent_tree(&dev_extent_cache);
10014         free_extent_cache_tree(&seen);
10015         free_extent_cache_tree(&pending);
10016         free_extent_cache_tree(&reada);
10017         free_extent_cache_tree(&nodes);
10018         free_root_item_list(&normal_trees);
10019         free_root_item_list(&dropping_trees);
10020         return ret;
10021 loop:
10022         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10023         free_extent_cache_tree(&seen);
10024         free_extent_cache_tree(&pending);
10025         free_extent_cache_tree(&reada);
10026         free_extent_cache_tree(&nodes);
10027         free_chunk_cache_tree(&chunk_cache);
10028         free_block_group_tree(&block_group_cache);
10029         free_device_cache_tree(&dev_cache);
10030         free_device_extent_tree(&dev_extent_cache);
10031         free_extent_record_cache(&extent_cache);
10032         free_root_item_list(&normal_trees);
10033         free_root_item_list(&dropping_trees);
10034         extent_io_tree_cleanup(&excluded_extents);
10035         goto again;
10036 }
10037
10038 /*
10039  * Check backrefs of a tree block given by @bytenr or @eb.
10040  *
10041  * @root:       the root containing the @bytenr or @eb
10042  * @eb:         tree block extent buffer, can be NULL
10043  * @bytenr:     bytenr of the tree block to search
10044  * @level:      tree level of the tree block
10045  * @owner:      owner of the tree block
10046  *
10047  * Return >0 for any error found and output error message
10048  * Return 0 for no error found
10049  */
10050 static int check_tree_block_ref(struct btrfs_root *root,
10051                                 struct extent_buffer *eb, u64 bytenr,
10052                                 int level, u64 owner)
10053 {
10054         struct btrfs_key key;
10055         struct btrfs_root *extent_root = root->fs_info->extent_root;
10056         struct btrfs_path path;
10057         struct btrfs_extent_item *ei;
10058         struct btrfs_extent_inline_ref *iref;
10059         struct extent_buffer *leaf;
10060         unsigned long end;
10061         unsigned long ptr;
10062         int slot;
10063         int skinny_level;
10064         int type;
10065         u32 nodesize = root->nodesize;
10066         u32 item_size;
10067         u64 offset;
10068         int tree_reloc_root = 0;
10069         int found_ref = 0;
10070         int err = 0;
10071         int ret;
10072
10073         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10074             btrfs_header_bytenr(root->node) == bytenr)
10075                 tree_reloc_root = 1;
10076
10077         btrfs_init_path(&path);
10078         key.objectid = bytenr;
10079         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10080                 key.type = BTRFS_METADATA_ITEM_KEY;
10081         else
10082                 key.type = BTRFS_EXTENT_ITEM_KEY;
10083         key.offset = (u64)-1;
10084
10085         /* Search for the backref in extent tree */
10086         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10087         if (ret < 0) {
10088                 err |= BACKREF_MISSING;
10089                 goto out;
10090         }
10091         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10092         if (ret) {
10093                 err |= BACKREF_MISSING;
10094                 goto out;
10095         }
10096
10097         leaf = path.nodes[0];
10098         slot = path.slots[0];
10099         btrfs_item_key_to_cpu(leaf, &key, slot);
10100
10101         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10102
10103         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10104                 skinny_level = (int)key.offset;
10105                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10106         } else {
10107                 struct btrfs_tree_block_info *info;
10108
10109                 info = (struct btrfs_tree_block_info *)(ei + 1);
10110                 skinny_level = btrfs_tree_block_level(leaf, info);
10111                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10112         }
10113
10114         if (eb) {
10115                 u64 header_gen;
10116                 u64 extent_gen;
10117
10118                 if (!(btrfs_extent_flags(leaf, ei) &
10119                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10120                         error(
10121                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10122                                 key.objectid, nodesize,
10123                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10124                         err = BACKREF_MISMATCH;
10125                 }
10126                 header_gen = btrfs_header_generation(eb);
10127                 extent_gen = btrfs_extent_generation(leaf, ei);
10128                 if (header_gen != extent_gen) {
10129                         error(
10130         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10131                                 key.objectid, nodesize, header_gen,
10132                                 extent_gen);
10133                         err = BACKREF_MISMATCH;
10134                 }
10135                 if (level != skinny_level) {
10136                         error(
10137                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10138                                 key.objectid, nodesize, level, skinny_level);
10139                         err = BACKREF_MISMATCH;
10140                 }
10141                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10142                         error(
10143                         "extent[%llu %u] is referred by other roots than %llu",
10144                                 key.objectid, nodesize, root->objectid);
10145                         err = BACKREF_MISMATCH;
10146                 }
10147         }
10148
10149         /*
10150          * Iterate the extent/metadata item to find the exact backref
10151          */
10152         item_size = btrfs_item_size_nr(leaf, slot);
10153         ptr = (unsigned long)iref;
10154         end = (unsigned long)ei + item_size;
10155         while (ptr < end) {
10156                 iref = (struct btrfs_extent_inline_ref *)ptr;
10157                 type = btrfs_extent_inline_ref_type(leaf, iref);
10158                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10159
10160                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10161                         (offset == root->objectid || offset == owner)) {
10162                         found_ref = 1;
10163                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10164                         /*
10165                          * Backref of tree reloc root points to itself, no need
10166                          * to check backref any more.
10167                          */
10168                         if (tree_reloc_root)
10169                                 found_ref = 1;
10170                         else
10171                         /* Check if the backref points to valid referencer */
10172                                 found_ref = !check_tree_block_ref(root, NULL,
10173                                                 offset, level + 1, owner);
10174                 }
10175
10176                 if (found_ref)
10177                         break;
10178                 ptr += btrfs_extent_inline_ref_size(type);
10179         }
10180
10181         /*
10182          * Inlined extent item doesn't have what we need, check
10183          * TREE_BLOCK_REF_KEY
10184          */
10185         if (!found_ref) {
10186                 btrfs_release_path(&path);
10187                 key.objectid = bytenr;
10188                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10189                 key.offset = root->objectid;
10190
10191                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10192                 if (!ret)
10193                         found_ref = 1;
10194         }
10195         if (!found_ref)
10196                 err |= BACKREF_MISSING;
10197 out:
10198         btrfs_release_path(&path);
10199         if (eb && (err & BACKREF_MISSING))
10200                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10201                         bytenr, nodesize, owner, level);
10202         return err;
10203 }
10204
10205 /*
10206  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10207  *
10208  * Return >0 any error found and output error message
10209  * Return 0 for no error found
10210  */
10211 static int check_extent_data_item(struct btrfs_root *root,
10212                                   struct extent_buffer *eb, int slot)
10213 {
10214         struct btrfs_file_extent_item *fi;
10215         struct btrfs_path path;
10216         struct btrfs_root *extent_root = root->fs_info->extent_root;
10217         struct btrfs_key fi_key;
10218         struct btrfs_key dbref_key;
10219         struct extent_buffer *leaf;
10220         struct btrfs_extent_item *ei;
10221         struct btrfs_extent_inline_ref *iref;
10222         struct btrfs_extent_data_ref *dref;
10223         u64 owner;
10224         u64 disk_bytenr;
10225         u64 disk_num_bytes;
10226         u64 extent_num_bytes;
10227         u64 extent_flags;
10228         u32 item_size;
10229         unsigned long end;
10230         unsigned long ptr;
10231         int type;
10232         u64 ref_root;
10233         int found_dbackref = 0;
10234         int err = 0;
10235         int ret;
10236
10237         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10238         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10239
10240         /* Nothing to check for hole and inline data extents */
10241         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10242             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10243                 return 0;
10244
10245         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10246         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10247         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10248
10249         /* Check unaligned disk_num_bytes and num_bytes */
10250         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10251                 error(
10252 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10253                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10254                         root->sectorsize);
10255                 err |= BYTES_UNALIGNED;
10256         } else {
10257                 data_bytes_allocated += disk_num_bytes;
10258         }
10259         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10260                 error(
10261 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10262                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10263                         root->sectorsize);
10264                 err |= BYTES_UNALIGNED;
10265         } else {
10266                 data_bytes_referenced += extent_num_bytes;
10267         }
10268         owner = btrfs_header_owner(eb);
10269
10270         /* Check the extent item of the file extent in extent tree */
10271         btrfs_init_path(&path);
10272         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10273         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10274         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10275
10276         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10277         if (ret)
10278                 goto out;
10279
10280         leaf = path.nodes[0];
10281         slot = path.slots[0];
10282         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10283
10284         extent_flags = btrfs_extent_flags(leaf, ei);
10285
10286         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10287                 error(
10288                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10289                     disk_bytenr, disk_num_bytes,
10290                     BTRFS_EXTENT_FLAG_DATA);
10291                 err |= BACKREF_MISMATCH;
10292         }
10293
10294         /* Check data backref inside that extent item */
10295         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10296         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10297         ptr = (unsigned long)iref;
10298         end = (unsigned long)ei + item_size;
10299         while (ptr < end) {
10300                 iref = (struct btrfs_extent_inline_ref *)ptr;
10301                 type = btrfs_extent_inline_ref_type(leaf, iref);
10302                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10303
10304                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10305                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10306                         if (ref_root == owner || ref_root == root->objectid)
10307                                 found_dbackref = 1;
10308                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10309                         found_dbackref = !check_tree_block_ref(root, NULL,
10310                                 btrfs_extent_inline_ref_offset(leaf, iref),
10311                                 0, owner);
10312                 }
10313
10314                 if (found_dbackref)
10315                         break;
10316                 ptr += btrfs_extent_inline_ref_size(type);
10317         }
10318
10319         if (!found_dbackref) {
10320                 btrfs_release_path(&path);
10321
10322                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10323                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10324                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10325                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10326                                 fi_key.objectid, fi_key.offset);
10327
10328                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10329                                         &dbref_key, &path, 0, 0);
10330                 if (!ret) {
10331                         found_dbackref = 1;
10332                         goto out;
10333                 }
10334
10335                 btrfs_release_path(&path);
10336
10337                 /*
10338                  * Neither inlined nor EXTENT_DATA_REF found, try
10339                  * SHARED_DATA_REF as last chance.
10340                  */
10341                 dbref_key.objectid = disk_bytenr;
10342                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10343                 dbref_key.offset = eb->start;
10344
10345                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10346                                         &dbref_key, &path, 0, 0);
10347                 if (!ret) {
10348                         found_dbackref = 1;
10349                         goto out;
10350                 }
10351         }
10352
10353 out:
10354         if (!found_dbackref)
10355                 err |= BACKREF_MISSING;
10356         btrfs_release_path(&path);
10357         if (err & BACKREF_MISSING) {
10358                 error("data extent[%llu %llu] backref lost",
10359                       disk_bytenr, disk_num_bytes);
10360         }
10361         return err;
10362 }
10363
10364 /*
10365  * Get real tree block level for the case like shared block
10366  * Return >= 0 as tree level
10367  * Return <0 for error
10368  */
10369 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10370 {
10371         struct extent_buffer *eb;
10372         struct btrfs_path path;
10373         struct btrfs_key key;
10374         struct btrfs_extent_item *ei;
10375         u64 flags;
10376         u64 transid;
10377         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10378         u8 backref_level;
10379         u8 header_level;
10380         int ret;
10381
10382         /* Search extent tree for extent generation and level */
10383         key.objectid = bytenr;
10384         key.type = BTRFS_METADATA_ITEM_KEY;
10385         key.offset = (u64)-1;
10386
10387         btrfs_init_path(&path);
10388         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10389         if (ret < 0)
10390                 goto release_out;
10391         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10392         if (ret < 0)
10393                 goto release_out;
10394         if (ret > 0) {
10395                 ret = -ENOENT;
10396                 goto release_out;
10397         }
10398
10399         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10400         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10401                             struct btrfs_extent_item);
10402         flags = btrfs_extent_flags(path.nodes[0], ei);
10403         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10404                 ret = -ENOENT;
10405                 goto release_out;
10406         }
10407
10408         /* Get transid for later read_tree_block() check */
10409         transid = btrfs_extent_generation(path.nodes[0], ei);
10410
10411         /* Get backref level as one source */
10412         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10413                 backref_level = key.offset;
10414         } else {
10415                 struct btrfs_tree_block_info *info;
10416
10417                 info = (struct btrfs_tree_block_info *)(ei + 1);
10418                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10419         }
10420         btrfs_release_path(&path);
10421
10422         /* Get level from tree block as an alternative source */
10423         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10424         if (!extent_buffer_uptodate(eb)) {
10425                 free_extent_buffer(eb);
10426                 return -EIO;
10427         }
10428         header_level = btrfs_header_level(eb);
10429         free_extent_buffer(eb);
10430
10431         if (header_level != backref_level)
10432                 return -EIO;
10433         return header_level;
10434
10435 release_out:
10436         btrfs_release_path(&path);
10437         return ret;
10438 }
10439
10440 /*
10441  * Check if a tree block backref is valid (points to a valid tree block)
10442  * if level == -1, level will be resolved
10443  * Return >0 for any error found and print error message
10444  */
10445 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10446                                     u64 bytenr, int level)
10447 {
10448         struct btrfs_root *root;
10449         struct btrfs_key key;
10450         struct btrfs_path path;
10451         struct extent_buffer *eb;
10452         struct extent_buffer *node;
10453         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10454         int err = 0;
10455         int ret;
10456
10457         /* Query level for level == -1 special case */
10458         if (level == -1)
10459                 level = query_tree_block_level(fs_info, bytenr);
10460         if (level < 0) {
10461                 err |= REFERENCER_MISSING;
10462                 goto out;
10463         }
10464
10465         key.objectid = root_id;
10466         key.type = BTRFS_ROOT_ITEM_KEY;
10467         key.offset = (u64)-1;
10468
10469         root = btrfs_read_fs_root(fs_info, &key);
10470         if (IS_ERR(root)) {
10471                 err |= REFERENCER_MISSING;
10472                 goto out;
10473         }
10474
10475         /* Read out the tree block to get item/node key */
10476         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10477         if (!extent_buffer_uptodate(eb)) {
10478                 err |= REFERENCER_MISSING;
10479                 free_extent_buffer(eb);
10480                 goto out;
10481         }
10482
10483         /* Empty tree, no need to check key */
10484         if (!btrfs_header_nritems(eb) && !level) {
10485                 free_extent_buffer(eb);
10486                 goto out;
10487         }
10488
10489         if (level)
10490                 btrfs_node_key_to_cpu(eb, &key, 0);
10491         else
10492                 btrfs_item_key_to_cpu(eb, &key, 0);
10493
10494         free_extent_buffer(eb);
10495
10496         btrfs_init_path(&path);
10497         path.lowest_level = level;
10498         /* Search with the first key, to ensure we can reach it */
10499         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10500         if (ret < 0) {
10501                 err |= REFERENCER_MISSING;
10502                 goto release_out;
10503         }
10504
10505         node = path.nodes[level];
10506         if (btrfs_header_bytenr(node) != bytenr) {
10507                 error(
10508         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10509                         bytenr, nodesize, bytenr,
10510                         btrfs_header_bytenr(node));
10511                 err |= REFERENCER_MISMATCH;
10512         }
10513         if (btrfs_header_level(node) != level) {
10514                 error(
10515         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10516                         bytenr, nodesize, level,
10517                         btrfs_header_level(node));
10518                 err |= REFERENCER_MISMATCH;
10519         }
10520
10521 release_out:
10522         btrfs_release_path(&path);
10523 out:
10524         if (err & REFERENCER_MISSING) {
10525                 if (level < 0)
10526                         error("extent [%llu %d] lost referencer (owner: %llu)",
10527                                 bytenr, nodesize, root_id);
10528                 else
10529                         error(
10530                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10531                                 bytenr, nodesize, root_id, level);
10532         }
10533
10534         return err;
10535 }
10536
10537 /*
10538  * Check if tree block @eb is tree reloc root.
10539  * Return 0 if it's not or any problem happens
10540  * Return 1 if it's a tree reloc root
10541  */
10542 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10543                                  struct extent_buffer *eb)
10544 {
10545         struct btrfs_root *tree_reloc_root;
10546         struct btrfs_key key;
10547         u64 bytenr = btrfs_header_bytenr(eb);
10548         u64 owner = btrfs_header_owner(eb);
10549         int ret = 0;
10550
10551         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10552         key.offset = owner;
10553         key.type = BTRFS_ROOT_ITEM_KEY;
10554
10555         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10556         if (IS_ERR(tree_reloc_root))
10557                 return 0;
10558
10559         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10560                 ret = 1;
10561         btrfs_free_fs_root(tree_reloc_root);
10562         return ret;
10563 }
10564
10565 /*
10566  * Check referencer for shared block backref
10567  * If level == -1, this function will resolve the level.
10568  */
10569 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10570                                      u64 parent, u64 bytenr, int level)
10571 {
10572         struct extent_buffer *eb;
10573         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10574         u32 nr;
10575         int found_parent = 0;
10576         int i;
10577
10578         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10579         if (!extent_buffer_uptodate(eb))
10580                 goto out;
10581
10582         if (level == -1)
10583                 level = query_tree_block_level(fs_info, bytenr);
10584         if (level < 0)
10585                 goto out;
10586
10587         /* It's possible it's a tree reloc root */
10588         if (parent == bytenr) {
10589                 if (is_tree_reloc_root(fs_info, eb))
10590                         found_parent = 1;
10591                 goto out;
10592         }
10593
10594         if (level + 1 != btrfs_header_level(eb))
10595                 goto out;
10596
10597         nr = btrfs_header_nritems(eb);
10598         for (i = 0; i < nr; i++) {
10599                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10600                         found_parent = 1;
10601                         break;
10602                 }
10603         }
10604 out:
10605         free_extent_buffer(eb);
10606         if (!found_parent) {
10607                 error(
10608         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10609                         bytenr, nodesize, parent, level);
10610                 return REFERENCER_MISSING;
10611         }
10612         return 0;
10613 }
10614
10615 /*
10616  * Check referencer for normal (inlined) data ref
10617  * If len == 0, it will be resolved by searching in extent tree
10618  */
10619 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10620                                      u64 root_id, u64 objectid, u64 offset,
10621                                      u64 bytenr, u64 len, u32 count)
10622 {
10623         struct btrfs_root *root;
10624         struct btrfs_root *extent_root = fs_info->extent_root;
10625         struct btrfs_key key;
10626         struct btrfs_path path;
10627         struct extent_buffer *leaf;
10628         struct btrfs_file_extent_item *fi;
10629         u32 found_count = 0;
10630         int slot;
10631         int ret = 0;
10632
10633         if (!len) {
10634                 key.objectid = bytenr;
10635                 key.type = BTRFS_EXTENT_ITEM_KEY;
10636                 key.offset = (u64)-1;
10637
10638                 btrfs_init_path(&path);
10639                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10640                 if (ret < 0)
10641                         goto out;
10642                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10643                 if (ret)
10644                         goto out;
10645                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10646                 if (key.objectid != bytenr ||
10647                     key.type != BTRFS_EXTENT_ITEM_KEY)
10648                         goto out;
10649                 len = key.offset;
10650                 btrfs_release_path(&path);
10651         }
10652         key.objectid = root_id;
10653         key.type = BTRFS_ROOT_ITEM_KEY;
10654         key.offset = (u64)-1;
10655         btrfs_init_path(&path);
10656
10657         root = btrfs_read_fs_root(fs_info, &key);
10658         if (IS_ERR(root))
10659                 goto out;
10660
10661         key.objectid = objectid;
10662         key.type = BTRFS_EXTENT_DATA_KEY;
10663         /*
10664          * It can be nasty as data backref offset is
10665          * file offset - file extent offset, which is smaller or
10666          * equal to original backref offset.  The only special case is
10667          * overflow.  So we need to special check and do further search.
10668          */
10669         key.offset = offset & (1ULL << 63) ? 0 : offset;
10670
10671         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10672         if (ret < 0)
10673                 goto out;
10674
10675         /*
10676          * Search afterwards to get correct one
10677          * NOTE: As we must do a comprehensive check on the data backref to
10678          * make sure the dref count also matches, we must iterate all file
10679          * extents for that inode.
10680          */
10681         while (1) {
10682                 leaf = path.nodes[0];
10683                 slot = path.slots[0];
10684
10685                 if (slot >= btrfs_header_nritems(leaf))
10686                         goto next;
10687                 btrfs_item_key_to_cpu(leaf, &key, slot);
10688                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10689                         break;
10690                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10691                 /*
10692                  * Except normal disk bytenr and disk num bytes, we still
10693                  * need to do extra check on dbackref offset as
10694                  * dbackref offset = file_offset - file_extent_offset
10695                  */
10696                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10697                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10698                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10699                     offset)
10700                         found_count++;
10701
10702 next:
10703                 ret = btrfs_next_item(root, &path);
10704                 if (ret)
10705                         break;
10706         }
10707 out:
10708         btrfs_release_path(&path);
10709         if (found_count != count) {
10710                 error(
10711 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10712                         bytenr, len, root_id, objectid, offset, count, found_count);
10713                 return REFERENCER_MISSING;
10714         }
10715         return 0;
10716 }
10717
10718 /*
10719  * Check if the referencer of a shared data backref exists
10720  */
10721 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10722                                      u64 parent, u64 bytenr)
10723 {
10724         struct extent_buffer *eb;
10725         struct btrfs_key key;
10726         struct btrfs_file_extent_item *fi;
10727         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10728         u32 nr;
10729         int found_parent = 0;
10730         int i;
10731
10732         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10733         if (!extent_buffer_uptodate(eb))
10734                 goto out;
10735
10736         nr = btrfs_header_nritems(eb);
10737         for (i = 0; i < nr; i++) {
10738                 btrfs_item_key_to_cpu(eb, &key, i);
10739                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10740                         continue;
10741
10742                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10743                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10744                         continue;
10745
10746                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10747                         found_parent = 1;
10748                         break;
10749                 }
10750         }
10751
10752 out:
10753         free_extent_buffer(eb);
10754         if (!found_parent) {
10755                 error("shared extent %llu referencer lost (parent: %llu)",
10756                         bytenr, parent);
10757                 return REFERENCER_MISSING;
10758         }
10759         return 0;
10760 }
10761
10762 /*
10763  * This function will check a given extent item, including its backref and
10764  * itself (like crossing stripe boundary and type)
10765  *
10766  * Since we don't use extent_record anymore, introduce new error bit
10767  */
10768 static int check_extent_item(struct btrfs_fs_info *fs_info,
10769                              struct extent_buffer *eb, int slot)
10770 {
10771         struct btrfs_extent_item *ei;
10772         struct btrfs_extent_inline_ref *iref;
10773         struct btrfs_extent_data_ref *dref;
10774         unsigned long end;
10775         unsigned long ptr;
10776         int type;
10777         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10778         u32 item_size = btrfs_item_size_nr(eb, slot);
10779         u64 flags;
10780         u64 offset;
10781         int metadata = 0;
10782         int level;
10783         struct btrfs_key key;
10784         int ret;
10785         int err = 0;
10786
10787         btrfs_item_key_to_cpu(eb, &key, slot);
10788         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10789                 bytes_used += key.offset;
10790         else
10791                 bytes_used += nodesize;
10792
10793         if (item_size < sizeof(*ei)) {
10794                 /*
10795                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10796                  * old thing when on disk format is still un-determined.
10797                  * No need to care about it anymore
10798                  */
10799                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10800                 return -ENOTTY;
10801         }
10802
10803         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10804         flags = btrfs_extent_flags(eb, ei);
10805
10806         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10807                 metadata = 1;
10808         if (metadata && check_crossing_stripes(global_info, key.objectid,
10809                                                eb->len)) {
10810                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10811                       key.objectid, key.objectid + nodesize);
10812                 err |= CROSSING_STRIPE_BOUNDARY;
10813         }
10814
10815         ptr = (unsigned long)(ei + 1);
10816
10817         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10818                 /* Old EXTENT_ITEM metadata */
10819                 struct btrfs_tree_block_info *info;
10820
10821                 info = (struct btrfs_tree_block_info *)ptr;
10822                 level = btrfs_tree_block_level(eb, info);
10823                 ptr += sizeof(struct btrfs_tree_block_info);
10824         } else {
10825                 /* New METADATA_ITEM */
10826                 level = key.offset;
10827         }
10828         end = (unsigned long)ei + item_size;
10829
10830 next:
10831         /* Reached extent item end normally */
10832         if (ptr == end)
10833                 goto out;
10834
10835         /* Beyond extent item end, wrong item size */
10836         if (ptr > end) {
10837                 err |= ITEM_SIZE_MISMATCH;
10838                 error("extent item at bytenr %llu slot %d has wrong size",
10839                         eb->start, slot);
10840                 goto out;
10841         }
10842
10843         /* Now check every backref in this extent item */
10844         iref = (struct btrfs_extent_inline_ref *)ptr;
10845         type = btrfs_extent_inline_ref_type(eb, iref);
10846         offset = btrfs_extent_inline_ref_offset(eb, iref);
10847         switch (type) {
10848         case BTRFS_TREE_BLOCK_REF_KEY:
10849                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10850                                                level);
10851                 err |= ret;
10852                 break;
10853         case BTRFS_SHARED_BLOCK_REF_KEY:
10854                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10855                                                  level);
10856                 err |= ret;
10857                 break;
10858         case BTRFS_EXTENT_DATA_REF_KEY:
10859                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10860                 ret = check_extent_data_backref(fs_info,
10861                                 btrfs_extent_data_ref_root(eb, dref),
10862                                 btrfs_extent_data_ref_objectid(eb, dref),
10863                                 btrfs_extent_data_ref_offset(eb, dref),
10864                                 key.objectid, key.offset,
10865                                 btrfs_extent_data_ref_count(eb, dref));
10866                 err |= ret;
10867                 break;
10868         case BTRFS_SHARED_DATA_REF_KEY:
10869                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10870                 err |= ret;
10871                 break;
10872         default:
10873                 error("extent[%llu %d %llu] has unknown ref type: %d",
10874                         key.objectid, key.type, key.offset, type);
10875                 err |= UNKNOWN_TYPE;
10876                 goto out;
10877         }
10878
10879         ptr += btrfs_extent_inline_ref_size(type);
10880         goto next;
10881
10882 out:
10883         return err;
10884 }
10885
10886 /*
10887  * Check if a dev extent item is referred correctly by its chunk
10888  */
10889 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10890                                  struct extent_buffer *eb, int slot)
10891 {
10892         struct btrfs_root *chunk_root = fs_info->chunk_root;
10893         struct btrfs_dev_extent *ptr;
10894         struct btrfs_path path;
10895         struct btrfs_key chunk_key;
10896         struct btrfs_key devext_key;
10897         struct btrfs_chunk *chunk;
10898         struct extent_buffer *l;
10899         int num_stripes;
10900         u64 length;
10901         int i;
10902         int found_chunk = 0;
10903         int ret;
10904
10905         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10906         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10907         length = btrfs_dev_extent_length(eb, ptr);
10908
10909         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10910         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10911         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10912
10913         btrfs_init_path(&path);
10914         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10915         if (ret)
10916                 goto out;
10917
10918         l = path.nodes[0];
10919         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10920         if (btrfs_chunk_length(l, chunk) != length)
10921                 goto out;
10922
10923         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10924         for (i = 0; i < num_stripes; i++) {
10925                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10926                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10927
10928                 if (devid == devext_key.objectid &&
10929                     offset == devext_key.offset) {
10930                         found_chunk = 1;
10931                         break;
10932                 }
10933         }
10934 out:
10935         btrfs_release_path(&path);
10936         if (!found_chunk) {
10937                 error(
10938                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10939                         devext_key.objectid, devext_key.offset, length);
10940                 return REFERENCER_MISSING;
10941         }
10942         return 0;
10943 }
10944
10945 /*
10946  * Check if the used space is correct with the dev item
10947  */
10948 static int check_dev_item(struct btrfs_fs_info *fs_info,
10949                           struct extent_buffer *eb, int slot)
10950 {
10951         struct btrfs_root *dev_root = fs_info->dev_root;
10952         struct btrfs_dev_item *dev_item;
10953         struct btrfs_path path;
10954         struct btrfs_key key;
10955         struct btrfs_dev_extent *ptr;
10956         u64 dev_id;
10957         u64 used;
10958         u64 total = 0;
10959         int ret;
10960
10961         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10962         dev_id = btrfs_device_id(eb, dev_item);
10963         used = btrfs_device_bytes_used(eb, dev_item);
10964
10965         key.objectid = dev_id;
10966         key.type = BTRFS_DEV_EXTENT_KEY;
10967         key.offset = 0;
10968
10969         btrfs_init_path(&path);
10970         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10971         if (ret < 0) {
10972                 btrfs_item_key_to_cpu(eb, &key, slot);
10973                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10974                         key.objectid, key.type, key.offset);
10975                 btrfs_release_path(&path);
10976                 return REFERENCER_MISSING;
10977         }
10978
10979         /* Iterate dev_extents to calculate the used space of a device */
10980         while (1) {
10981                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10982                         goto next;
10983
10984                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10985                 if (key.objectid > dev_id)
10986                         break;
10987                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10988                         goto next;
10989
10990                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10991                                      struct btrfs_dev_extent);
10992                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10993 next:
10994                 ret = btrfs_next_item(dev_root, &path);
10995                 if (ret)
10996                         break;
10997         }
10998         btrfs_release_path(&path);
10999
11000         if (used != total) {
11001                 btrfs_item_key_to_cpu(eb, &key, slot);
11002                 error(
11003 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11004                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11005                         BTRFS_DEV_EXTENT_KEY, dev_id);
11006                 return ACCOUNTING_MISMATCH;
11007         }
11008         return 0;
11009 }
11010
11011 /*
11012  * Check a block group item with its referener (chunk) and its used space
11013  * with extent/metadata item
11014  */
11015 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11016                                   struct extent_buffer *eb, int slot)
11017 {
11018         struct btrfs_root *extent_root = fs_info->extent_root;
11019         struct btrfs_root *chunk_root = fs_info->chunk_root;
11020         struct btrfs_block_group_item *bi;
11021         struct btrfs_block_group_item bg_item;
11022         struct btrfs_path path;
11023         struct btrfs_key bg_key;
11024         struct btrfs_key chunk_key;
11025         struct btrfs_key extent_key;
11026         struct btrfs_chunk *chunk;
11027         struct extent_buffer *leaf;
11028         struct btrfs_extent_item *ei;
11029         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11030         u64 flags;
11031         u64 bg_flags;
11032         u64 used;
11033         u64 total = 0;
11034         int ret;
11035         int err = 0;
11036
11037         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11038         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11039         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11040         used = btrfs_block_group_used(&bg_item);
11041         bg_flags = btrfs_block_group_flags(&bg_item);
11042
11043         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11044         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11045         chunk_key.offset = bg_key.objectid;
11046
11047         btrfs_init_path(&path);
11048         /* Search for the referencer chunk */
11049         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11050         if (ret) {
11051                 error(
11052                 "block group[%llu %llu] did not find the related chunk item",
11053                         bg_key.objectid, bg_key.offset);
11054                 err |= REFERENCER_MISSING;
11055         } else {
11056                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11057                                         struct btrfs_chunk);
11058                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11059                                                 bg_key.offset) {
11060                         error(
11061         "block group[%llu %llu] related chunk item length does not match",
11062                                 bg_key.objectid, bg_key.offset);
11063                         err |= REFERENCER_MISMATCH;
11064                 }
11065         }
11066         btrfs_release_path(&path);
11067
11068         /* Search from the block group bytenr */
11069         extent_key.objectid = bg_key.objectid;
11070         extent_key.type = 0;
11071         extent_key.offset = 0;
11072
11073         btrfs_init_path(&path);
11074         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11075         if (ret < 0)
11076                 goto out;
11077
11078         /* Iterate extent tree to account used space */
11079         while (1) {
11080                 leaf = path.nodes[0];
11081
11082                 /* Search slot can point to the last item beyond leaf nritems */
11083                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11084                         goto next;
11085
11086                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11087                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11088                         break;
11089
11090                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11091                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11092                         goto next;
11093                 if (extent_key.objectid < bg_key.objectid)
11094                         goto next;
11095
11096                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11097                         total += nodesize;
11098                 else
11099                         total += extent_key.offset;
11100
11101                 ei = btrfs_item_ptr(leaf, path.slots[0],
11102                                     struct btrfs_extent_item);
11103                 flags = btrfs_extent_flags(leaf, ei);
11104                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11105                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11106                                 error(
11107                         "bad extent[%llu, %llu) type mismatch with chunk",
11108                                         extent_key.objectid,
11109                                         extent_key.objectid + extent_key.offset);
11110                                 err |= CHUNK_TYPE_MISMATCH;
11111                         }
11112                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11113                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11114                                     BTRFS_BLOCK_GROUP_METADATA))) {
11115                                 error(
11116                         "bad extent[%llu, %llu) type mismatch with chunk",
11117                                         extent_key.objectid,
11118                                         extent_key.objectid + nodesize);
11119                                 err |= CHUNK_TYPE_MISMATCH;
11120                         }
11121                 }
11122 next:
11123                 ret = btrfs_next_item(extent_root, &path);
11124                 if (ret)
11125                         break;
11126         }
11127
11128 out:
11129         btrfs_release_path(&path);
11130
11131         if (total != used) {
11132                 error(
11133                 "block group[%llu %llu] used %llu but extent items used %llu",
11134                         bg_key.objectid, bg_key.offset, used, total);
11135                 err |= ACCOUNTING_MISMATCH;
11136         }
11137         return err;
11138 }
11139
11140 /*
11141  * Check a chunk item.
11142  * Including checking all referred dev_extents and block group
11143  */
11144 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11145                             struct extent_buffer *eb, int slot)
11146 {
11147         struct btrfs_root *extent_root = fs_info->extent_root;
11148         struct btrfs_root *dev_root = fs_info->dev_root;
11149         struct btrfs_path path;
11150         struct btrfs_key chunk_key;
11151         struct btrfs_key bg_key;
11152         struct btrfs_key devext_key;
11153         struct btrfs_chunk *chunk;
11154         struct extent_buffer *leaf;
11155         struct btrfs_block_group_item *bi;
11156         struct btrfs_block_group_item bg_item;
11157         struct btrfs_dev_extent *ptr;
11158         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11159         u64 length;
11160         u64 chunk_end;
11161         u64 type;
11162         u64 profile;
11163         int num_stripes;
11164         u64 offset;
11165         u64 objectid;
11166         int i;
11167         int ret;
11168         int err = 0;
11169
11170         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11171         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11172         length = btrfs_chunk_length(eb, chunk);
11173         chunk_end = chunk_key.offset + length;
11174         if (!IS_ALIGNED(length, sectorsize)) {
11175                 error("chunk[%llu %llu) not aligned to %u",
11176                         chunk_key.offset, chunk_end, sectorsize);
11177                 err |= BYTES_UNALIGNED;
11178                 goto out;
11179         }
11180
11181         type = btrfs_chunk_type(eb, chunk);
11182         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11183         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11184                 error("chunk[%llu %llu) has no chunk type",
11185                         chunk_key.offset, chunk_end);
11186                 err |= UNKNOWN_TYPE;
11187         }
11188         if (profile && (profile & (profile - 1))) {
11189                 error("chunk[%llu %llu) multiple profiles detected: %llx",
11190                         chunk_key.offset, chunk_end, profile);
11191                 err |= UNKNOWN_TYPE;
11192         }
11193
11194         bg_key.objectid = chunk_key.offset;
11195         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11196         bg_key.offset = length;
11197
11198         btrfs_init_path(&path);
11199         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11200         if (ret) {
11201                 error(
11202                 "chunk[%llu %llu) did not find the related block group item",
11203                         chunk_key.offset, chunk_end);
11204                 err |= REFERENCER_MISSING;
11205         } else{
11206                 leaf = path.nodes[0];
11207                 bi = btrfs_item_ptr(leaf, path.slots[0],
11208                                     struct btrfs_block_group_item);
11209                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11210                                    sizeof(bg_item));
11211                 if (btrfs_block_group_flags(&bg_item) != type) {
11212                         error(
11213 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11214                                 chunk_key.offset, chunk_end, type,
11215                                 btrfs_block_group_flags(&bg_item));
11216                         err |= REFERENCER_MISSING;
11217                 }
11218         }
11219
11220         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11221         for (i = 0; i < num_stripes; i++) {
11222                 btrfs_release_path(&path);
11223                 btrfs_init_path(&path);
11224                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11225                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11226                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11227
11228                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11229                                         0, 0);
11230                 if (ret)
11231                         goto not_match_dev;
11232
11233                 leaf = path.nodes[0];
11234                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11235                                      struct btrfs_dev_extent);
11236                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11237                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11238                 if (objectid != chunk_key.objectid ||
11239                     offset != chunk_key.offset ||
11240                     btrfs_dev_extent_length(leaf, ptr) != length)
11241                         goto not_match_dev;
11242                 continue;
11243 not_match_dev:
11244                 err |= BACKREF_MISSING;
11245                 error(
11246                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11247                         chunk_key.objectid, chunk_end, i);
11248                 continue;
11249         }
11250         btrfs_release_path(&path);
11251 out:
11252         return err;
11253 }
11254
11255 /*
11256  * Main entry function to check known items and update related accounting info
11257  */
11258 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11259 {
11260         struct btrfs_fs_info *fs_info = root->fs_info;
11261         struct btrfs_key key;
11262         int slot = 0;
11263         int type;
11264         struct btrfs_extent_data_ref *dref;
11265         int ret;
11266         int err = 0;
11267
11268 next:
11269         btrfs_item_key_to_cpu(eb, &key, slot);
11270         type = key.type;
11271
11272         switch (type) {
11273         case BTRFS_EXTENT_DATA_KEY:
11274                 ret = check_extent_data_item(root, eb, slot);
11275                 err |= ret;
11276                 break;
11277         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11278                 ret = check_block_group_item(fs_info, eb, slot);
11279                 err |= ret;
11280                 break;
11281         case BTRFS_DEV_ITEM_KEY:
11282                 ret = check_dev_item(fs_info, eb, slot);
11283                 err |= ret;
11284                 break;
11285         case BTRFS_CHUNK_ITEM_KEY:
11286                 ret = check_chunk_item(fs_info, eb, slot);
11287                 err |= ret;
11288                 break;
11289         case BTRFS_DEV_EXTENT_KEY:
11290                 ret = check_dev_extent_item(fs_info, eb, slot);
11291                 err |= ret;
11292                 break;
11293         case BTRFS_EXTENT_ITEM_KEY:
11294         case BTRFS_METADATA_ITEM_KEY:
11295                 ret = check_extent_item(fs_info, eb, slot);
11296                 err |= ret;
11297                 break;
11298         case BTRFS_EXTENT_CSUM_KEY:
11299                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11300                 break;
11301         case BTRFS_TREE_BLOCK_REF_KEY:
11302                 ret = check_tree_block_backref(fs_info, key.offset,
11303                                                key.objectid, -1);
11304                 err |= ret;
11305                 break;
11306         case BTRFS_EXTENT_DATA_REF_KEY:
11307                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11308                 ret = check_extent_data_backref(fs_info,
11309                                 btrfs_extent_data_ref_root(eb, dref),
11310                                 btrfs_extent_data_ref_objectid(eb, dref),
11311                                 btrfs_extent_data_ref_offset(eb, dref),
11312                                 key.objectid, 0,
11313                                 btrfs_extent_data_ref_count(eb, dref));
11314                 err |= ret;
11315                 break;
11316         case BTRFS_SHARED_BLOCK_REF_KEY:
11317                 ret = check_shared_block_backref(fs_info, key.offset,
11318                                                  key.objectid, -1);
11319                 err |= ret;
11320                 break;
11321         case BTRFS_SHARED_DATA_REF_KEY:
11322                 ret = check_shared_data_backref(fs_info, key.offset,
11323                                                 key.objectid);
11324                 err |= ret;
11325                 break;
11326         default:
11327                 break;
11328         }
11329
11330         if (++slot < btrfs_header_nritems(eb))
11331                 goto next;
11332
11333         return err;
11334 }
11335
11336 /*
11337  * Helper function for later fs/subvol tree check.  To determine if a tree
11338  * block should be checked.
11339  * This function will ensure only the direct referencer with lowest rootid to
11340  * check a fs/subvolume tree block.
11341  *
11342  * Backref check at extent tree would detect errors like missing subvolume
11343  * tree, so we can do aggressive check to reduce duplicated checks.
11344  */
11345 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11346 {
11347         struct btrfs_root *extent_root = root->fs_info->extent_root;
11348         struct btrfs_key key;
11349         struct btrfs_path path;
11350         struct extent_buffer *leaf;
11351         int slot;
11352         struct btrfs_extent_item *ei;
11353         unsigned long ptr;
11354         unsigned long end;
11355         int type;
11356         u32 item_size;
11357         u64 offset;
11358         struct btrfs_extent_inline_ref *iref;
11359         int ret;
11360
11361         btrfs_init_path(&path);
11362         key.objectid = btrfs_header_bytenr(eb);
11363         key.type = BTRFS_METADATA_ITEM_KEY;
11364         key.offset = (u64)-1;
11365
11366         /*
11367          * Any failure in backref resolving means we can't determine
11368          * whom the tree block belongs to.
11369          * So in that case, we need to check that tree block
11370          */
11371         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11372         if (ret < 0)
11373                 goto need_check;
11374
11375         ret = btrfs_previous_extent_item(extent_root, &path,
11376                                          btrfs_header_bytenr(eb));
11377         if (ret)
11378                 goto need_check;
11379
11380         leaf = path.nodes[0];
11381         slot = path.slots[0];
11382         btrfs_item_key_to_cpu(leaf, &key, slot);
11383         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11384
11385         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11386                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11387         } else {
11388                 struct btrfs_tree_block_info *info;
11389
11390                 info = (struct btrfs_tree_block_info *)(ei + 1);
11391                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11392         }
11393
11394         item_size = btrfs_item_size_nr(leaf, slot);
11395         ptr = (unsigned long)iref;
11396         end = (unsigned long)ei + item_size;
11397         while (ptr < end) {
11398                 iref = (struct btrfs_extent_inline_ref *)ptr;
11399                 type = btrfs_extent_inline_ref_type(leaf, iref);
11400                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11401
11402                 /*
11403                  * We only check the tree block if current root is
11404                  * the lowest referencer of it.
11405                  */
11406                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11407                     offset < root->objectid) {
11408                         btrfs_release_path(&path);
11409                         return 0;
11410                 }
11411
11412                 ptr += btrfs_extent_inline_ref_size(type);
11413         }
11414         /*
11415          * Normally we should also check keyed tree block ref, but that may be
11416          * very time consuming.  Inlined ref should already make us skip a lot
11417          * of refs now.  So skip search keyed tree block ref.
11418          */
11419
11420 need_check:
11421         btrfs_release_path(&path);
11422         return 1;
11423 }
11424
11425 /*
11426  * Traversal function for tree block. We will do:
11427  * 1) Skip shared fs/subvolume tree blocks
11428  * 2) Update related bytes accounting
11429  * 3) Pre-order traversal
11430  */
11431 static int traverse_tree_block(struct btrfs_root *root,
11432                                 struct extent_buffer *node)
11433 {
11434         struct extent_buffer *eb;
11435         struct btrfs_key key;
11436         struct btrfs_key drop_key;
11437         int level;
11438         u64 nr;
11439         int i;
11440         int err = 0;
11441         int ret;
11442
11443         /*
11444          * Skip shared fs/subvolume tree block, in that case they will
11445          * be checked by referencer with lowest rootid
11446          */
11447         if (is_fstree(root->objectid) && !should_check(root, node))
11448                 return 0;
11449
11450         /* Update bytes accounting */
11451         total_btree_bytes += node->len;
11452         if (fs_root_objectid(btrfs_header_owner(node)))
11453                 total_fs_tree_bytes += node->len;
11454         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11455                 total_extent_tree_bytes += node->len;
11456         if (!found_old_backref &&
11457             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11458             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11459             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11460                 found_old_backref = 1;
11461
11462         /* pre-order tranversal, check itself first */
11463         level = btrfs_header_level(node);
11464         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11465                                    btrfs_header_level(node),
11466                                    btrfs_header_owner(node));
11467         err |= ret;
11468         if (err)
11469                 error(
11470         "check %s failed root %llu bytenr %llu level %d, force continue check",
11471                         level ? "node":"leaf", root->objectid,
11472                         btrfs_header_bytenr(node), btrfs_header_level(node));
11473
11474         if (!level) {
11475                 btree_space_waste += btrfs_leaf_free_space(root, node);
11476                 ret = check_leaf_items(root, node);
11477                 err |= ret;
11478                 return err;
11479         }
11480
11481         nr = btrfs_header_nritems(node);
11482         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11483         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11484                 sizeof(struct btrfs_key_ptr);
11485
11486         /* Then check all its children */
11487         for (i = 0; i < nr; i++) {
11488                 u64 blocknr = btrfs_node_blockptr(node, i);
11489
11490                 btrfs_node_key_to_cpu(node, &key, i);
11491                 if (level == root->root_item.drop_level &&
11492                     is_dropped_key(&key, &drop_key))
11493                         continue;
11494
11495                 /*
11496                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11497                  * to call the function itself.
11498                  */
11499                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11500                 if (extent_buffer_uptodate(eb)) {
11501                         ret = traverse_tree_block(root, eb);
11502                         err |= ret;
11503                 }
11504                 free_extent_buffer(eb);
11505         }
11506
11507         return err;
11508 }
11509
11510 /*
11511  * Low memory usage version check_chunks_and_extents.
11512  */
11513 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11514 {
11515         struct btrfs_path path;
11516         struct btrfs_key key;
11517         struct btrfs_root *root1;
11518         struct btrfs_root *cur_root;
11519         int err = 0;
11520         int ret;
11521
11522         root1 = root->fs_info->chunk_root;
11523         ret = traverse_tree_block(root1, root1->node);
11524         err |= ret;
11525
11526         root1 = root->fs_info->tree_root;
11527         ret = traverse_tree_block(root1, root1->node);
11528         err |= ret;
11529
11530         btrfs_init_path(&path);
11531         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11532         key.offset = 0;
11533         key.type = BTRFS_ROOT_ITEM_KEY;
11534
11535         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11536         if (ret) {
11537                 error("cannot find extent treet in tree_root");
11538                 goto out;
11539         }
11540
11541         while (1) {
11542                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11543                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11544                         goto next;
11545                 key.offset = (u64)-1;
11546
11547                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11548                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11549                                         &key);
11550                 else
11551                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11552                 if (IS_ERR(cur_root) || !cur_root) {
11553                         error("failed to read tree: %lld", key.objectid);
11554                         goto next;
11555                 }
11556
11557                 ret = traverse_tree_block(cur_root, cur_root->node);
11558                 err |= ret;
11559
11560                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11561                         btrfs_free_fs_root(cur_root);
11562 next:
11563                 ret = btrfs_next_item(root1, &path);
11564                 if (ret)
11565                         goto out;
11566         }
11567
11568 out:
11569         btrfs_release_path(&path);
11570         return err;
11571 }
11572
11573 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11574                            struct btrfs_root *root, int overwrite)
11575 {
11576         struct extent_buffer *c;
11577         struct extent_buffer *old = root->node;
11578         int level;
11579         int ret;
11580         struct btrfs_disk_key disk_key = {0,0,0};
11581
11582         level = 0;
11583
11584         if (overwrite) {
11585                 c = old;
11586                 extent_buffer_get(c);
11587                 goto init;
11588         }
11589         c = btrfs_alloc_free_block(trans, root,
11590                                    root->nodesize,
11591                                    root->root_key.objectid,
11592                                    &disk_key, level, 0, 0);
11593         if (IS_ERR(c)) {
11594                 c = old;
11595                 extent_buffer_get(c);
11596                 overwrite = 1;
11597         }
11598 init:
11599         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11600         btrfs_set_header_level(c, level);
11601         btrfs_set_header_bytenr(c, c->start);
11602         btrfs_set_header_generation(c, trans->transid);
11603         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11604         btrfs_set_header_owner(c, root->root_key.objectid);
11605
11606         write_extent_buffer(c, root->fs_info->fsid,
11607                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11608
11609         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11610                             btrfs_header_chunk_tree_uuid(c),
11611                             BTRFS_UUID_SIZE);
11612
11613         btrfs_mark_buffer_dirty(c);
11614         /*
11615          * this case can happen in the following case:
11616          *
11617          * 1.overwrite previous root.
11618          *
11619          * 2.reinit reloc data root, this is because we skip pin
11620          * down reloc data tree before which means we can allocate
11621          * same block bytenr here.
11622          */
11623         if (old->start == c->start) {
11624                 btrfs_set_root_generation(&root->root_item,
11625                                           trans->transid);
11626                 root->root_item.level = btrfs_header_level(root->node);
11627                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11628                                         &root->root_key, &root->root_item);
11629                 if (ret) {
11630                         free_extent_buffer(c);
11631                         return ret;
11632                 }
11633         }
11634         free_extent_buffer(old);
11635         root->node = c;
11636         add_root_to_dirty_list(root);
11637         return 0;
11638 }
11639
11640 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11641                                 struct extent_buffer *eb, int tree_root)
11642 {
11643         struct extent_buffer *tmp;
11644         struct btrfs_root_item *ri;
11645         struct btrfs_key key;
11646         u64 bytenr;
11647         u32 nodesize;
11648         int level = btrfs_header_level(eb);
11649         int nritems;
11650         int ret;
11651         int i;
11652
11653         /*
11654          * If we have pinned this block before, don't pin it again.
11655          * This can not only avoid forever loop with broken filesystem
11656          * but also give us some speedups.
11657          */
11658         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11659                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11660                 return 0;
11661
11662         btrfs_pin_extent(fs_info, eb->start, eb->len);
11663
11664         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11665         nritems = btrfs_header_nritems(eb);
11666         for (i = 0; i < nritems; i++) {
11667                 if (level == 0) {
11668                         btrfs_item_key_to_cpu(eb, &key, i);
11669                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11670                                 continue;
11671                         /* Skip the extent root and reloc roots */
11672                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11673                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11674                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11675                                 continue;
11676                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11677                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11678
11679                         /*
11680                          * If at any point we start needing the real root we
11681                          * will have to build a stump root for the root we are
11682                          * in, but for now this doesn't actually use the root so
11683                          * just pass in extent_root.
11684                          */
11685                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11686                                               nodesize, 0);
11687                         if (!extent_buffer_uptodate(tmp)) {
11688                                 fprintf(stderr, "Error reading root block\n");
11689                                 return -EIO;
11690                         }
11691                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11692                         free_extent_buffer(tmp);
11693                         if (ret)
11694                                 return ret;
11695                 } else {
11696                         bytenr = btrfs_node_blockptr(eb, i);
11697
11698                         /* If we aren't the tree root don't read the block */
11699                         if (level == 1 && !tree_root) {
11700                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11701                                 continue;
11702                         }
11703
11704                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11705                                               nodesize, 0);
11706                         if (!extent_buffer_uptodate(tmp)) {
11707                                 fprintf(stderr, "Error reading tree block\n");
11708                                 return -EIO;
11709                         }
11710                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11711                         free_extent_buffer(tmp);
11712                         if (ret)
11713                                 return ret;
11714                 }
11715         }
11716
11717         return 0;
11718 }
11719
11720 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11721 {
11722         int ret;
11723
11724         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11725         if (ret)
11726                 return ret;
11727
11728         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11729 }
11730
11731 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11732 {
11733         struct btrfs_block_group_cache *cache;
11734         struct btrfs_path path;
11735         struct extent_buffer *leaf;
11736         struct btrfs_chunk *chunk;
11737         struct btrfs_key key;
11738         int ret;
11739         u64 start;
11740
11741         btrfs_init_path(&path);
11742         key.objectid = 0;
11743         key.type = BTRFS_CHUNK_ITEM_KEY;
11744         key.offset = 0;
11745         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11746         if (ret < 0) {
11747                 btrfs_release_path(&path);
11748                 return ret;
11749         }
11750
11751         /*
11752          * We do this in case the block groups were screwed up and had alloc
11753          * bits that aren't actually set on the chunks.  This happens with
11754          * restored images every time and could happen in real life I guess.
11755          */
11756         fs_info->avail_data_alloc_bits = 0;
11757         fs_info->avail_metadata_alloc_bits = 0;
11758         fs_info->avail_system_alloc_bits = 0;
11759
11760         /* First we need to create the in-memory block groups */
11761         while (1) {
11762                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11763                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11764                         if (ret < 0) {
11765                                 btrfs_release_path(&path);
11766                                 return ret;
11767                         }
11768                         if (ret) {
11769                                 ret = 0;
11770                                 break;
11771                         }
11772                 }
11773                 leaf = path.nodes[0];
11774                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11775                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11776                         path.slots[0]++;
11777                         continue;
11778                 }
11779
11780                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11781                 btrfs_add_block_group(fs_info, 0,
11782                                       btrfs_chunk_type(leaf, chunk),
11783                                       key.objectid, key.offset,
11784                                       btrfs_chunk_length(leaf, chunk));
11785                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11786                                  key.offset + btrfs_chunk_length(leaf, chunk));
11787                 path.slots[0]++;
11788         }
11789         start = 0;
11790         while (1) {
11791                 cache = btrfs_lookup_first_block_group(fs_info, start);
11792                 if (!cache)
11793                         break;
11794                 cache->cached = 1;
11795                 start = cache->key.objectid + cache->key.offset;
11796         }
11797
11798         btrfs_release_path(&path);
11799         return 0;
11800 }
11801
11802 static int reset_balance(struct btrfs_trans_handle *trans,
11803                          struct btrfs_fs_info *fs_info)
11804 {
11805         struct btrfs_root *root = fs_info->tree_root;
11806         struct btrfs_path path;
11807         struct extent_buffer *leaf;
11808         struct btrfs_key key;
11809         int del_slot, del_nr = 0;
11810         int ret;
11811         int found = 0;
11812
11813         btrfs_init_path(&path);
11814         key.objectid = BTRFS_BALANCE_OBJECTID;
11815         key.type = BTRFS_BALANCE_ITEM_KEY;
11816         key.offset = 0;
11817         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11818         if (ret) {
11819                 if (ret > 0)
11820                         ret = 0;
11821                 if (!ret)
11822                         goto reinit_data_reloc;
11823                 else
11824                         goto out;
11825         }
11826
11827         ret = btrfs_del_item(trans, root, &path);
11828         if (ret)
11829                 goto out;
11830         btrfs_release_path(&path);
11831
11832         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11833         key.type = BTRFS_ROOT_ITEM_KEY;
11834         key.offset = 0;
11835         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11836         if (ret < 0)
11837                 goto out;
11838         while (1) {
11839                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11840                         if (!found)
11841                                 break;
11842
11843                         if (del_nr) {
11844                                 ret = btrfs_del_items(trans, root, &path,
11845                                                       del_slot, del_nr);
11846                                 del_nr = 0;
11847                                 if (ret)
11848                                         goto out;
11849                         }
11850                         key.offset++;
11851                         btrfs_release_path(&path);
11852
11853                         found = 0;
11854                         ret = btrfs_search_slot(trans, root, &key, &path,
11855                                                 -1, 1);
11856                         if (ret < 0)
11857                                 goto out;
11858                         continue;
11859                 }
11860                 found = 1;
11861                 leaf = path.nodes[0];
11862                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11863                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11864                         break;
11865                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11866                         path.slots[0]++;
11867                         continue;
11868                 }
11869                 if (!del_nr) {
11870                         del_slot = path.slots[0];
11871                         del_nr = 1;
11872                 } else {
11873                         del_nr++;
11874                 }
11875                 path.slots[0]++;
11876         }
11877
11878         if (del_nr) {
11879                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11880                 if (ret)
11881                         goto out;
11882         }
11883         btrfs_release_path(&path);
11884
11885 reinit_data_reloc:
11886         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11887         key.type = BTRFS_ROOT_ITEM_KEY;
11888         key.offset = (u64)-1;
11889         root = btrfs_read_fs_root(fs_info, &key);
11890         if (IS_ERR(root)) {
11891                 fprintf(stderr, "Error reading data reloc tree\n");
11892                 ret = PTR_ERR(root);
11893                 goto out;
11894         }
11895         record_root_in_trans(trans, root);
11896         ret = btrfs_fsck_reinit_root(trans, root, 0);
11897         if (ret)
11898                 goto out;
11899         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11900 out:
11901         btrfs_release_path(&path);
11902         return ret;
11903 }
11904
11905 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11906                               struct btrfs_fs_info *fs_info)
11907 {
11908         u64 start = 0;
11909         int ret;
11910
11911         /*
11912          * The only reason we don't do this is because right now we're just
11913          * walking the trees we find and pinning down their bytes, we don't look
11914          * at any of the leaves.  In order to do mixed groups we'd have to check
11915          * the leaves of any fs roots and pin down the bytes for any file
11916          * extents we find.  Not hard but why do it if we don't have to?
11917          */
11918         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11919                 fprintf(stderr, "We don't support re-initing the extent tree "
11920                         "for mixed block groups yet, please notify a btrfs "
11921                         "developer you want to do this so they can add this "
11922                         "functionality.\n");
11923                 return -EINVAL;
11924         }
11925
11926         /*
11927          * first we need to walk all of the trees except the extent tree and pin
11928          * down the bytes that are in use so we don't overwrite any existing
11929          * metadata.
11930          */
11931         ret = pin_metadata_blocks(fs_info);
11932         if (ret) {
11933                 fprintf(stderr, "error pinning down used bytes\n");
11934                 return ret;
11935         }
11936
11937         /*
11938          * Need to drop all the block groups since we're going to recreate all
11939          * of them again.
11940          */
11941         btrfs_free_block_groups(fs_info);
11942         ret = reset_block_groups(fs_info);
11943         if (ret) {
11944                 fprintf(stderr, "error resetting the block groups\n");
11945                 return ret;
11946         }
11947
11948         /* Ok we can allocate now, reinit the extent root */
11949         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11950         if (ret) {
11951                 fprintf(stderr, "extent root initialization failed\n");
11952                 /*
11953                  * When the transaction code is updated we should end the
11954                  * transaction, but for now progs only knows about commit so
11955                  * just return an error.
11956                  */
11957                 return ret;
11958         }
11959
11960         /*
11961          * Now we have all the in-memory block groups setup so we can make
11962          * allocations properly, and the metadata we care about is safe since we
11963          * pinned all of it above.
11964          */
11965         while (1) {
11966                 struct btrfs_block_group_cache *cache;
11967
11968                 cache = btrfs_lookup_first_block_group(fs_info, start);
11969                 if (!cache)
11970                         break;
11971                 start = cache->key.objectid + cache->key.offset;
11972                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11973                                         &cache->key, &cache->item,
11974                                         sizeof(cache->item));
11975                 if (ret) {
11976                         fprintf(stderr, "Error adding block group\n");
11977                         return ret;
11978                 }
11979                 btrfs_extent_post_op(trans, fs_info->extent_root);
11980         }
11981
11982         ret = reset_balance(trans, fs_info);
11983         if (ret)
11984                 fprintf(stderr, "error resetting the pending balance\n");
11985
11986         return ret;
11987 }
11988
11989 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11990 {
11991         struct btrfs_path path;
11992         struct btrfs_trans_handle *trans;
11993         struct btrfs_key key;
11994         int ret;
11995
11996         printf("Recowing metadata block %llu\n", eb->start);
11997         key.objectid = btrfs_header_owner(eb);
11998         key.type = BTRFS_ROOT_ITEM_KEY;
11999         key.offset = (u64)-1;
12000
12001         root = btrfs_read_fs_root(root->fs_info, &key);
12002         if (IS_ERR(root)) {
12003                 fprintf(stderr, "Couldn't find owner root %llu\n",
12004                         key.objectid);
12005                 return PTR_ERR(root);
12006         }
12007
12008         trans = btrfs_start_transaction(root, 1);
12009         if (IS_ERR(trans))
12010                 return PTR_ERR(trans);
12011
12012         btrfs_init_path(&path);
12013         path.lowest_level = btrfs_header_level(eb);
12014         if (path.lowest_level)
12015                 btrfs_node_key_to_cpu(eb, &key, 0);
12016         else
12017                 btrfs_item_key_to_cpu(eb, &key, 0);
12018
12019         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12020         btrfs_commit_transaction(trans, root);
12021         btrfs_release_path(&path);
12022         return ret;
12023 }
12024
12025 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12026 {
12027         struct btrfs_path path;
12028         struct btrfs_trans_handle *trans;
12029         struct btrfs_key key;
12030         int ret;
12031
12032         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12033                bad->key.type, bad->key.offset);
12034         key.objectid = bad->root_id;
12035         key.type = BTRFS_ROOT_ITEM_KEY;
12036         key.offset = (u64)-1;
12037
12038         root = btrfs_read_fs_root(root->fs_info, &key);
12039         if (IS_ERR(root)) {
12040                 fprintf(stderr, "Couldn't find owner root %llu\n",
12041                         key.objectid);
12042                 return PTR_ERR(root);
12043         }
12044
12045         trans = btrfs_start_transaction(root, 1);
12046         if (IS_ERR(trans))
12047                 return PTR_ERR(trans);
12048
12049         btrfs_init_path(&path);
12050         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12051         if (ret) {
12052                 if (ret > 0)
12053                         ret = 0;
12054                 goto out;
12055         }
12056         ret = btrfs_del_item(trans, root, &path);
12057 out:
12058         btrfs_commit_transaction(trans, root);
12059         btrfs_release_path(&path);
12060         return ret;
12061 }
12062
12063 static int zero_log_tree(struct btrfs_root *root)
12064 {
12065         struct btrfs_trans_handle *trans;
12066         int ret;
12067
12068         trans = btrfs_start_transaction(root, 1);
12069         if (IS_ERR(trans)) {
12070                 ret = PTR_ERR(trans);
12071                 return ret;
12072         }
12073         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12074         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12075         ret = btrfs_commit_transaction(trans, root);
12076         return ret;
12077 }
12078
12079 static int populate_csum(struct btrfs_trans_handle *trans,
12080                          struct btrfs_root *csum_root, char *buf, u64 start,
12081                          u64 len)
12082 {
12083         u64 offset = 0;
12084         u64 sectorsize;
12085         int ret = 0;
12086
12087         while (offset < len) {
12088                 sectorsize = csum_root->sectorsize;
12089                 ret = read_extent_data(csum_root, buf, start + offset,
12090                                        &sectorsize, 0);
12091                 if (ret)
12092                         break;
12093                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12094                                             start + offset, buf, sectorsize);
12095                 if (ret)
12096                         break;
12097                 offset += sectorsize;
12098         }
12099         return ret;
12100 }
12101
12102 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12103                                       struct btrfs_root *csum_root,
12104                                       struct btrfs_root *cur_root)
12105 {
12106         struct btrfs_path path;
12107         struct btrfs_key key;
12108         struct extent_buffer *node;
12109         struct btrfs_file_extent_item *fi;
12110         char *buf = NULL;
12111         u64 start = 0;
12112         u64 len = 0;
12113         int slot = 0;
12114         int ret = 0;
12115
12116         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12117         if (!buf)
12118                 return -ENOMEM;
12119
12120         btrfs_init_path(&path);
12121         key.objectid = 0;
12122         key.offset = 0;
12123         key.type = 0;
12124         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12125         if (ret < 0)
12126                 goto out;
12127         /* Iterate all regular file extents and fill its csum */
12128         while (1) {
12129                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12130
12131                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12132                         goto next;
12133                 node = path.nodes[0];
12134                 slot = path.slots[0];
12135                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12136                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12137                         goto next;
12138                 start = btrfs_file_extent_disk_bytenr(node, fi);
12139                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12140
12141                 ret = populate_csum(trans, csum_root, buf, start, len);
12142                 if (ret == -EEXIST)
12143                         ret = 0;
12144                 if (ret < 0)
12145                         goto out;
12146 next:
12147                 /*
12148                  * TODO: if next leaf is corrupted, jump to nearest next valid
12149                  * leaf.
12150                  */
12151                 ret = btrfs_next_item(cur_root, &path);
12152                 if (ret < 0)
12153                         goto out;
12154                 if (ret > 0) {
12155                         ret = 0;
12156                         goto out;
12157                 }
12158         }
12159
12160 out:
12161         btrfs_release_path(&path);
12162         free(buf);
12163         return ret;
12164 }
12165
12166 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12167                                   struct btrfs_root *csum_root)
12168 {
12169         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12170         struct btrfs_path path;
12171         struct btrfs_root *tree_root = fs_info->tree_root;
12172         struct btrfs_root *cur_root;
12173         struct extent_buffer *node;
12174         struct btrfs_key key;
12175         int slot = 0;
12176         int ret = 0;
12177
12178         btrfs_init_path(&path);
12179         key.objectid = BTRFS_FS_TREE_OBJECTID;
12180         key.offset = 0;
12181         key.type = BTRFS_ROOT_ITEM_KEY;
12182         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12183         if (ret < 0)
12184                 goto out;
12185         if (ret > 0) {
12186                 ret = -ENOENT;
12187                 goto out;
12188         }
12189
12190         while (1) {
12191                 node = path.nodes[0];
12192                 slot = path.slots[0];
12193                 btrfs_item_key_to_cpu(node, &key, slot);
12194                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12195                         goto out;
12196                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12197                         goto next;
12198                 if (!is_fstree(key.objectid))
12199                         goto next;
12200                 key.offset = (u64)-1;
12201
12202                 cur_root = btrfs_read_fs_root(fs_info, &key);
12203                 if (IS_ERR(cur_root) || !cur_root) {
12204                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12205                                 key.objectid);
12206                         goto out;
12207                 }
12208                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12209                                 cur_root);
12210                 if (ret < 0)
12211                         goto out;
12212 next:
12213                 ret = btrfs_next_item(tree_root, &path);
12214                 if (ret > 0) {
12215                         ret = 0;
12216                         goto out;
12217                 }
12218                 if (ret < 0)
12219                         goto out;
12220         }
12221
12222 out:
12223         btrfs_release_path(&path);
12224         return ret;
12225 }
12226
12227 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12228                                       struct btrfs_root *csum_root)
12229 {
12230         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12231         struct btrfs_path path;
12232         struct btrfs_extent_item *ei;
12233         struct extent_buffer *leaf;
12234         char *buf;
12235         struct btrfs_key key;
12236         int ret;
12237
12238         btrfs_init_path(&path);
12239         key.objectid = 0;
12240         key.type = BTRFS_EXTENT_ITEM_KEY;
12241         key.offset = 0;
12242         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12243         if (ret < 0) {
12244                 btrfs_release_path(&path);
12245                 return ret;
12246         }
12247
12248         buf = malloc(csum_root->sectorsize);
12249         if (!buf) {
12250                 btrfs_release_path(&path);
12251                 return -ENOMEM;
12252         }
12253
12254         while (1) {
12255                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12256                         ret = btrfs_next_leaf(extent_root, &path);
12257                         if (ret < 0)
12258                                 break;
12259                         if (ret) {
12260                                 ret = 0;
12261                                 break;
12262                         }
12263                 }
12264                 leaf = path.nodes[0];
12265
12266                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12267                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12268                         path.slots[0]++;
12269                         continue;
12270                 }
12271
12272                 ei = btrfs_item_ptr(leaf, path.slots[0],
12273                                     struct btrfs_extent_item);
12274                 if (!(btrfs_extent_flags(leaf, ei) &
12275                       BTRFS_EXTENT_FLAG_DATA)) {
12276                         path.slots[0]++;
12277                         continue;
12278                 }
12279
12280                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12281                                     key.offset);
12282                 if (ret)
12283                         break;
12284                 path.slots[0]++;
12285         }
12286
12287         btrfs_release_path(&path);
12288         free(buf);
12289         return ret;
12290 }
12291
12292 /*
12293  * Recalculate the csum and put it into the csum tree.
12294  *
12295  * Extent tree init will wipe out all the extent info, so in that case, we
12296  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12297  * will use fs/subvol trees to init the csum tree.
12298  */
12299 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12300                           struct btrfs_root *csum_root,
12301                           int search_fs_tree)
12302 {
12303         if (search_fs_tree)
12304                 return fill_csum_tree_from_fs(trans, csum_root);
12305         else
12306                 return fill_csum_tree_from_extent(trans, csum_root);
12307 }
12308
12309 static void free_roots_info_cache(void)
12310 {
12311         if (!roots_info_cache)
12312                 return;
12313
12314         while (!cache_tree_empty(roots_info_cache)) {
12315                 struct cache_extent *entry;
12316                 struct root_item_info *rii;
12317
12318                 entry = first_cache_extent(roots_info_cache);
12319                 if (!entry)
12320                         break;
12321                 remove_cache_extent(roots_info_cache, entry);
12322                 rii = container_of(entry, struct root_item_info, cache_extent);
12323                 free(rii);
12324         }
12325
12326         free(roots_info_cache);
12327         roots_info_cache = NULL;
12328 }
12329
12330 static int build_roots_info_cache(struct btrfs_fs_info *info)
12331 {
12332         int ret = 0;
12333         struct btrfs_key key;
12334         struct extent_buffer *leaf;
12335         struct btrfs_path path;
12336
12337         if (!roots_info_cache) {
12338                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12339                 if (!roots_info_cache)
12340                         return -ENOMEM;
12341                 cache_tree_init(roots_info_cache);
12342         }
12343
12344         btrfs_init_path(&path);
12345         key.objectid = 0;
12346         key.type = BTRFS_EXTENT_ITEM_KEY;
12347         key.offset = 0;
12348         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12349         if (ret < 0)
12350                 goto out;
12351         leaf = path.nodes[0];
12352
12353         while (1) {
12354                 struct btrfs_key found_key;
12355                 struct btrfs_extent_item *ei;
12356                 struct btrfs_extent_inline_ref *iref;
12357                 int slot = path.slots[0];
12358                 int type;
12359                 u64 flags;
12360                 u64 root_id;
12361                 u8 level;
12362                 struct cache_extent *entry;
12363                 struct root_item_info *rii;
12364
12365                 if (slot >= btrfs_header_nritems(leaf)) {
12366                         ret = btrfs_next_leaf(info->extent_root, &path);
12367                         if (ret < 0) {
12368                                 break;
12369                         } else if (ret) {
12370                                 ret = 0;
12371                                 break;
12372                         }
12373                         leaf = path.nodes[0];
12374                         slot = path.slots[0];
12375                 }
12376
12377                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12378
12379                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12380                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12381                         goto next;
12382
12383                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12384                 flags = btrfs_extent_flags(leaf, ei);
12385
12386                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12387                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12388                         goto next;
12389
12390                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12391                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12392                         level = found_key.offset;
12393                 } else {
12394                         struct btrfs_tree_block_info *binfo;
12395
12396                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12397                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12398                         level = btrfs_tree_block_level(leaf, binfo);
12399                 }
12400
12401                 /*
12402                  * For a root extent, it must be of the following type and the
12403                  * first (and only one) iref in the item.
12404                  */
12405                 type = btrfs_extent_inline_ref_type(leaf, iref);
12406                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12407                         goto next;
12408
12409                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12410                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12411                 if (!entry) {
12412                         rii = malloc(sizeof(struct root_item_info));
12413                         if (!rii) {
12414                                 ret = -ENOMEM;
12415                                 goto out;
12416                         }
12417                         rii->cache_extent.start = root_id;
12418                         rii->cache_extent.size = 1;
12419                         rii->level = (u8)-1;
12420                         entry = &rii->cache_extent;
12421                         ret = insert_cache_extent(roots_info_cache, entry);
12422                         ASSERT(ret == 0);
12423                 } else {
12424                         rii = container_of(entry, struct root_item_info,
12425                                            cache_extent);
12426                 }
12427
12428                 ASSERT(rii->cache_extent.start == root_id);
12429                 ASSERT(rii->cache_extent.size == 1);
12430
12431                 if (level > rii->level || rii->level == (u8)-1) {
12432                         rii->level = level;
12433                         rii->bytenr = found_key.objectid;
12434                         rii->gen = btrfs_extent_generation(leaf, ei);
12435                         rii->node_count = 1;
12436                 } else if (level == rii->level) {
12437                         rii->node_count++;
12438                 }
12439 next:
12440                 path.slots[0]++;
12441         }
12442
12443 out:
12444         btrfs_release_path(&path);
12445
12446         return ret;
12447 }
12448
12449 static int maybe_repair_root_item(struct btrfs_path *path,
12450                                   const struct btrfs_key *root_key,
12451                                   const int read_only_mode)
12452 {
12453         const u64 root_id = root_key->objectid;
12454         struct cache_extent *entry;
12455         struct root_item_info *rii;
12456         struct btrfs_root_item ri;
12457         unsigned long offset;
12458
12459         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12460         if (!entry) {
12461                 fprintf(stderr,
12462                         "Error: could not find extent items for root %llu\n",
12463                         root_key->objectid);
12464                 return -ENOENT;
12465         }
12466
12467         rii = container_of(entry, struct root_item_info, cache_extent);
12468         ASSERT(rii->cache_extent.start == root_id);
12469         ASSERT(rii->cache_extent.size == 1);
12470
12471         if (rii->node_count != 1) {
12472                 fprintf(stderr,
12473                         "Error: could not find btree root extent for root %llu\n",
12474                         root_id);
12475                 return -ENOENT;
12476         }
12477
12478         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12479         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12480
12481         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12482             btrfs_root_level(&ri) != rii->level ||
12483             btrfs_root_generation(&ri) != rii->gen) {
12484
12485                 /*
12486                  * If we're in repair mode but our caller told us to not update
12487                  * the root item, i.e. just check if it needs to be updated, don't
12488                  * print this message, since the caller will call us again shortly
12489                  * for the same root item without read only mode (the caller will
12490                  * open a transaction first).
12491                  */
12492                 if (!(read_only_mode && repair))
12493                         fprintf(stderr,
12494                                 "%sroot item for root %llu,"
12495                                 " current bytenr %llu, current gen %llu, current level %u,"
12496                                 " new bytenr %llu, new gen %llu, new level %u\n",
12497                                 (read_only_mode ? "" : "fixing "),
12498                                 root_id,
12499                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12500                                 btrfs_root_level(&ri),
12501                                 rii->bytenr, rii->gen, rii->level);
12502
12503                 if (btrfs_root_generation(&ri) > rii->gen) {
12504                         fprintf(stderr,
12505                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12506                                 root_id, btrfs_root_generation(&ri), rii->gen);
12507                         return -EINVAL;
12508                 }
12509
12510                 if (!read_only_mode) {
12511                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12512                         btrfs_set_root_level(&ri, rii->level);
12513                         btrfs_set_root_generation(&ri, rii->gen);
12514                         write_extent_buffer(path->nodes[0], &ri,
12515                                             offset, sizeof(ri));
12516                 }
12517
12518                 return 1;
12519         }
12520
12521         return 0;
12522 }
12523
12524 /*
12525  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12526  * caused read-only snapshots to be corrupted if they were created at a moment
12527  * when the source subvolume/snapshot had orphan items. The issue was that the
12528  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12529  * node instead of the post orphan cleanup root node.
12530  * So this function, and its callees, just detects and fixes those cases. Even
12531  * though the regression was for read-only snapshots, this function applies to
12532  * any snapshot/subvolume root.
12533  * This must be run before any other repair code - not doing it so, makes other
12534  * repair code delete or modify backrefs in the extent tree for example, which
12535  * will result in an inconsistent fs after repairing the root items.
12536  */
12537 static int repair_root_items(struct btrfs_fs_info *info)
12538 {
12539         struct btrfs_path path;
12540         struct btrfs_key key;
12541         struct extent_buffer *leaf;
12542         struct btrfs_trans_handle *trans = NULL;
12543         int ret = 0;
12544         int bad_roots = 0;
12545         int need_trans = 0;
12546
12547         btrfs_init_path(&path);
12548
12549         ret = build_roots_info_cache(info);
12550         if (ret)
12551                 goto out;
12552
12553         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12554         key.type = BTRFS_ROOT_ITEM_KEY;
12555         key.offset = 0;
12556
12557 again:
12558         /*
12559          * Avoid opening and committing transactions if a leaf doesn't have
12560          * any root items that need to be fixed, so that we avoid rotating
12561          * backup roots unnecessarily.
12562          */
12563         if (need_trans) {
12564                 trans = btrfs_start_transaction(info->tree_root, 1);
12565                 if (IS_ERR(trans)) {
12566                         ret = PTR_ERR(trans);
12567                         goto out;
12568                 }
12569         }
12570
12571         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12572                                 0, trans ? 1 : 0);
12573         if (ret < 0)
12574                 goto out;
12575         leaf = path.nodes[0];
12576
12577         while (1) {
12578                 struct btrfs_key found_key;
12579
12580                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12581                         int no_more_keys = find_next_key(&path, &key);
12582
12583                         btrfs_release_path(&path);
12584                         if (trans) {
12585                                 ret = btrfs_commit_transaction(trans,
12586                                                                info->tree_root);
12587                                 trans = NULL;
12588                                 if (ret < 0)
12589                                         goto out;
12590                         }
12591                         need_trans = 0;
12592                         if (no_more_keys)
12593                                 break;
12594                         goto again;
12595                 }
12596
12597                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12598
12599                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12600                         goto next;
12601                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12602                         goto next;
12603
12604                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12605                 if (ret < 0)
12606                         goto out;
12607                 if (ret) {
12608                         if (!trans && repair) {
12609                                 need_trans = 1;
12610                                 key = found_key;
12611                                 btrfs_release_path(&path);
12612                                 goto again;
12613                         }
12614                         bad_roots++;
12615                 }
12616 next:
12617                 path.slots[0]++;
12618         }
12619         ret = 0;
12620 out:
12621         free_roots_info_cache();
12622         btrfs_release_path(&path);
12623         if (trans)
12624                 btrfs_commit_transaction(trans, info->tree_root);
12625         if (ret < 0)
12626                 return ret;
12627
12628         return bad_roots;
12629 }
12630
12631 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12632 {
12633         struct btrfs_trans_handle *trans;
12634         struct btrfs_block_group_cache *bg_cache;
12635         u64 current = 0;
12636         int ret = 0;
12637
12638         /* Clear all free space cache inodes and its extent data */
12639         while (1) {
12640                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12641                 if (!bg_cache)
12642                         break;
12643                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12644                 if (ret < 0)
12645                         return ret;
12646                 current = bg_cache->key.objectid + bg_cache->key.offset;
12647         }
12648
12649         /* Don't forget to set cache_generation to -1 */
12650         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12651         if (IS_ERR(trans)) {
12652                 error("failed to update super block cache generation");
12653                 return PTR_ERR(trans);
12654         }
12655         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12656         btrfs_commit_transaction(trans, fs_info->tree_root);
12657
12658         return ret;
12659 }
12660
12661 const char * const cmd_check_usage[] = {
12662         "btrfs check [options] <device>",
12663         "Check structural integrity of a filesystem (unmounted).",
12664         "Check structural integrity of an unmounted filesystem. Verify internal",
12665         "trees' consistency and item connectivity. In the repair mode try to",
12666         "fix the problems found. ",
12667         "WARNING: the repair mode is considered dangerous",
12668         "",
12669         "-s|--super <superblock>     use this superblock copy",
12670         "-b|--backup                 use the first valid backup root copy",
12671         "--repair                    try to repair the filesystem",
12672         "--readonly                  run in read-only mode (default)",
12673         "--init-csum-tree            create a new CRC tree",
12674         "--init-extent-tree          create a new extent tree",
12675         "--mode <MODE>               allows choice of memory/IO trade-offs",
12676         "                            where MODE is one of:",
12677         "                            original - read inodes and extents to memory (requires",
12678         "                                       more memory, does less IO)",
12679         "                            lowmem   - try to use less memory but read blocks again",
12680         "                                       when needed",
12681         "--check-data-csum           verify checksums of data blocks",
12682         "-Q|--qgroup-report          print a report on qgroup consistency",
12683         "-E|--subvol-extents <subvolid>",
12684         "                            print subvolume extents and sharing state",
12685         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12686         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12687         "-p|--progress               indicate progress",
12688         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12689         NULL
12690 };
12691
12692 int cmd_check(int argc, char **argv)
12693 {
12694         struct cache_tree root_cache;
12695         struct btrfs_root *root;
12696         struct btrfs_fs_info *info;
12697         u64 bytenr = 0;
12698         u64 subvolid = 0;
12699         u64 tree_root_bytenr = 0;
12700         u64 chunk_root_bytenr = 0;
12701         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12702         int ret;
12703         int err = 0;
12704         u64 num;
12705         int init_csum_tree = 0;
12706         int readonly = 0;
12707         int clear_space_cache = 0;
12708         int qgroup_report = 0;
12709         int qgroups_repaired = 0;
12710         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12711
12712         while(1) {
12713                 int c;
12714                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12715                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12716                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12717                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12718                 static const struct option long_options[] = {
12719                         { "super", required_argument, NULL, 's' },
12720                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12721                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12722                         { "init-csum-tree", no_argument, NULL,
12723                                 GETOPT_VAL_INIT_CSUM },
12724                         { "init-extent-tree", no_argument, NULL,
12725                                 GETOPT_VAL_INIT_EXTENT },
12726                         { "check-data-csum", no_argument, NULL,
12727                                 GETOPT_VAL_CHECK_CSUM },
12728                         { "backup", no_argument, NULL, 'b' },
12729                         { "subvol-extents", required_argument, NULL, 'E' },
12730                         { "qgroup-report", no_argument, NULL, 'Q' },
12731                         { "tree-root", required_argument, NULL, 'r' },
12732                         { "chunk-root", required_argument, NULL,
12733                                 GETOPT_VAL_CHUNK_TREE },
12734                         { "progress", no_argument, NULL, 'p' },
12735                         { "mode", required_argument, NULL,
12736                                 GETOPT_VAL_MODE },
12737                         { "clear-space-cache", required_argument, NULL,
12738                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12739                         { NULL, 0, NULL, 0}
12740                 };
12741
12742                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12743                 if (c < 0)
12744                         break;
12745                 switch(c) {
12746                         case 'a': /* ignored */ break;
12747                         case 'b':
12748                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12749                                 break;
12750                         case 's':
12751                                 num = arg_strtou64(optarg);
12752                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12753                                         error(
12754                                         "super mirror should be less than %d",
12755                                                 BTRFS_SUPER_MIRROR_MAX);
12756                                         exit(1);
12757                                 }
12758                                 bytenr = btrfs_sb_offset(((int)num));
12759                                 printf("using SB copy %llu, bytenr %llu\n", num,
12760                                        (unsigned long long)bytenr);
12761                                 break;
12762                         case 'Q':
12763                                 qgroup_report = 1;
12764                                 break;
12765                         case 'E':
12766                                 subvolid = arg_strtou64(optarg);
12767                                 break;
12768                         case 'r':
12769                                 tree_root_bytenr = arg_strtou64(optarg);
12770                                 break;
12771                         case GETOPT_VAL_CHUNK_TREE:
12772                                 chunk_root_bytenr = arg_strtou64(optarg);
12773                                 break;
12774                         case 'p':
12775                                 ctx.progress_enabled = true;
12776                                 break;
12777                         case '?':
12778                         case 'h':
12779                                 usage(cmd_check_usage);
12780                         case GETOPT_VAL_REPAIR:
12781                                 printf("enabling repair mode\n");
12782                                 repair = 1;
12783                                 ctree_flags |= OPEN_CTREE_WRITES;
12784                                 break;
12785                         case GETOPT_VAL_READONLY:
12786                                 readonly = 1;
12787                                 break;
12788                         case GETOPT_VAL_INIT_CSUM:
12789                                 printf("Creating a new CRC tree\n");
12790                                 init_csum_tree = 1;
12791                                 repair = 1;
12792                                 ctree_flags |= OPEN_CTREE_WRITES;
12793                                 break;
12794                         case GETOPT_VAL_INIT_EXTENT:
12795                                 init_extent_tree = 1;
12796                                 ctree_flags |= (OPEN_CTREE_WRITES |
12797                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12798                                 repair = 1;
12799                                 break;
12800                         case GETOPT_VAL_CHECK_CSUM:
12801                                 check_data_csum = 1;
12802                                 break;
12803                         case GETOPT_VAL_MODE:
12804                                 check_mode = parse_check_mode(optarg);
12805                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12806                                         error("unknown mode: %s", optarg);
12807                                         exit(1);
12808                                 }
12809                                 break;
12810                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12811                                 if (strcmp(optarg, "v1") == 0) {
12812                                         clear_space_cache = 1;
12813                                 } else if (strcmp(optarg, "v2") == 0) {
12814                                         clear_space_cache = 2;
12815                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12816                                 } else {
12817                                         error(
12818                 "invalid argument to --clear-space-cache, must be v1 or v2");
12819                                         exit(1);
12820                                 }
12821                                 ctree_flags |= OPEN_CTREE_WRITES;
12822                                 break;
12823                 }
12824         }
12825
12826         if (check_argc_exact(argc - optind, 1))
12827                 usage(cmd_check_usage);
12828
12829         if (ctx.progress_enabled) {
12830                 ctx.tp = TASK_NOTHING;
12831                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12832         }
12833
12834         /* This check is the only reason for --readonly to exist */
12835         if (readonly && repair) {
12836                 error("repair options are not compatible with --readonly");
12837                 exit(1);
12838         }
12839
12840         /*
12841          * Not supported yet
12842          */
12843         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12844                 error("low memory mode doesn't support repair yet");
12845                 exit(1);
12846         }
12847
12848         radix_tree_init();
12849         cache_tree_init(&root_cache);
12850
12851         if((ret = check_mounted(argv[optind])) < 0) {
12852                 error("could not check mount status: %s", strerror(-ret));
12853                 err |= !!ret;
12854                 goto err_out;
12855         } else if(ret) {
12856                 error("%s is currently mounted, aborting", argv[optind]);
12857                 ret = -EBUSY;
12858                 err |= !!ret;
12859                 goto err_out;
12860         }
12861
12862         /* only allow partial opening under repair mode */
12863         if (repair)
12864                 ctree_flags |= OPEN_CTREE_PARTIAL;
12865
12866         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12867                                   chunk_root_bytenr, ctree_flags);
12868         if (!info) {
12869                 error("cannot open file system");
12870                 ret = -EIO;
12871                 err |= !!ret;
12872                 goto err_out;
12873         }
12874
12875         global_info = info;
12876         root = info->fs_root;
12877         if (clear_space_cache == 1) {
12878                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12879                         error(
12880                 "free space cache v2 detected, use --clear-space-cache v2");
12881                         ret = 1;
12882                         goto close_out;
12883                 }
12884                 printf("Clearing free space cache\n");
12885                 ret = clear_free_space_cache(info);
12886                 if (ret) {
12887                         error("failed to clear free space cache");
12888                         ret = 1;
12889                 } else {
12890                         printf("Free space cache cleared\n");
12891                 }
12892                 goto close_out;
12893         } else if (clear_space_cache == 2) {
12894                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12895                         printf("no free space cache v2 to clear\n");
12896                         ret = 0;
12897                         goto close_out;
12898                 }
12899                 printf("Clear free space cache v2\n");
12900                 ret = btrfs_clear_free_space_tree(info);
12901                 if (ret) {
12902                         error("failed to clear free space cache v2: %d", ret);
12903                         ret = 1;
12904                 } else {
12905                         printf("free space cache v2 cleared\n");
12906                 }
12907                 goto close_out;
12908         }
12909
12910         /*
12911          * repair mode will force us to commit transaction which
12912          * will make us fail to load log tree when mounting.
12913          */
12914         if (repair && btrfs_super_log_root(info->super_copy)) {
12915                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12916                 if (!ret) {
12917                         ret = 1;
12918                         err |= !!ret;
12919                         goto close_out;
12920                 }
12921                 ret = zero_log_tree(root);
12922                 err |= !!ret;
12923                 if (ret) {
12924                         error("failed to zero log tree: %d", ret);
12925                         goto close_out;
12926                 }
12927         }
12928
12929         uuid_unparse(info->super_copy->fsid, uuidbuf);
12930         if (qgroup_report) {
12931                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12932                        uuidbuf);
12933                 ret = qgroup_verify_all(info);
12934                 err |= !!ret;
12935                 if (ret == 0)
12936                         report_qgroups(1);
12937                 goto close_out;
12938         }
12939         if (subvolid) {
12940                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12941                        subvolid, argv[optind], uuidbuf);
12942                 ret = print_extent_state(info, subvolid);
12943                 err |= !!ret;
12944                 goto close_out;
12945         }
12946         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12947
12948         if (!extent_buffer_uptodate(info->tree_root->node) ||
12949             !extent_buffer_uptodate(info->dev_root->node) ||
12950             !extent_buffer_uptodate(info->chunk_root->node)) {
12951                 error("critical roots corrupted, unable to check the filesystem");
12952                 err |= !!ret;
12953                 ret = -EIO;
12954                 goto close_out;
12955         }
12956
12957         if (init_extent_tree || init_csum_tree) {
12958                 struct btrfs_trans_handle *trans;
12959
12960                 trans = btrfs_start_transaction(info->extent_root, 0);
12961                 if (IS_ERR(trans)) {
12962                         error("error starting transaction");
12963                         ret = PTR_ERR(trans);
12964                         err |= !!ret;
12965                         goto close_out;
12966                 }
12967
12968                 if (init_extent_tree) {
12969                         printf("Creating a new extent tree\n");
12970                         ret = reinit_extent_tree(trans, info);
12971                         err |= !!ret;
12972                         if (ret)
12973                                 goto close_out;
12974                 }
12975
12976                 if (init_csum_tree) {
12977                         printf("Reinitialize checksum tree\n");
12978                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12979                         if (ret) {
12980                                 error("checksum tree initialization failed: %d",
12981                                                 ret);
12982                                 ret = -EIO;
12983                                 err |= !!ret;
12984                                 goto close_out;
12985                         }
12986
12987                         ret = fill_csum_tree(trans, info->csum_root,
12988                                              init_extent_tree);
12989                         err |= !!ret;
12990                         if (ret) {
12991                                 error("checksum tree refilling failed: %d", ret);
12992                                 return -EIO;
12993                         }
12994                 }
12995                 /*
12996                  * Ok now we commit and run the normal fsck, which will add
12997                  * extent entries for all of the items it finds.
12998                  */
12999                 ret = btrfs_commit_transaction(trans, info->extent_root);
13000                 err |= !!ret;
13001                 if (ret)
13002                         goto close_out;
13003         }
13004         if (!extent_buffer_uptodate(info->extent_root->node)) {
13005                 error("critical: extent_root, unable to check the filesystem");
13006                 ret = -EIO;
13007                 err |= !!ret;
13008                 goto close_out;
13009         }
13010         if (!extent_buffer_uptodate(info->csum_root->node)) {
13011                 error("critical: csum_root, unable to check the filesystem");
13012                 ret = -EIO;
13013                 err |= !!ret;
13014                 goto close_out;
13015         }
13016
13017         if (!ctx.progress_enabled)
13018                 fprintf(stderr, "checking extents\n");
13019         if (check_mode == CHECK_MODE_LOWMEM)
13020                 ret = check_chunks_and_extents_v2(root);
13021         else
13022                 ret = check_chunks_and_extents(root);
13023         err |= !!ret;
13024         if (ret)
13025                 error(
13026                 "errors found in extent allocation tree or chunk allocation");
13027
13028         ret = repair_root_items(info);
13029         err |= !!ret;
13030         if (ret < 0) {
13031                 error("failed to repair root items: %s", strerror(-ret));
13032                 goto close_out;
13033         }
13034         if (repair) {
13035                 fprintf(stderr, "Fixed %d roots.\n", ret);
13036                 ret = 0;
13037         } else if (ret > 0) {
13038                 fprintf(stderr,
13039                        "Found %d roots with an outdated root item.\n",
13040                        ret);
13041                 fprintf(stderr,
13042                         "Please run a filesystem check with the option --repair to fix them.\n");
13043                 ret = 1;
13044                 err |= !!ret;
13045                 goto close_out;
13046         }
13047
13048         if (!ctx.progress_enabled) {
13049                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13050                         fprintf(stderr, "checking free space tree\n");
13051                 else
13052                         fprintf(stderr, "checking free space cache\n");
13053         }
13054         ret = check_space_cache(root);
13055         err |= !!ret;
13056         if (ret) {
13057                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13058                         error("errors found in free space tree");
13059                 else
13060                         error("errors found in free space cache");
13061                 goto out;
13062         }
13063
13064         /*
13065          * We used to have to have these hole extents in between our real
13066          * extents so if we don't have this flag set we need to make sure there
13067          * are no gaps in the file extents for inodes, otherwise we can just
13068          * ignore it when this happens.
13069          */
13070         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13071         if (!ctx.progress_enabled)
13072                 fprintf(stderr, "checking fs roots\n");
13073         if (check_mode == CHECK_MODE_LOWMEM)
13074                 ret = check_fs_roots_v2(root->fs_info);
13075         else
13076                 ret = check_fs_roots(root, &root_cache);
13077         err |= !!ret;
13078         if (ret) {
13079                 error("errors found in fs roots");
13080                 goto out;
13081         }
13082
13083         fprintf(stderr, "checking csums\n");
13084         ret = check_csums(root);
13085         err |= !!ret;
13086         if (ret) {
13087                 error("errors found in csum tree");
13088                 goto out;
13089         }
13090
13091         fprintf(stderr, "checking root refs\n");
13092         /* For low memory mode, check_fs_roots_v2 handles root refs */
13093         if (check_mode != CHECK_MODE_LOWMEM) {
13094                 ret = check_root_refs(root, &root_cache);
13095                 err |= !!ret;
13096                 if (ret) {
13097                         error("errors found in root refs");
13098                         goto out;
13099                 }
13100         }
13101
13102         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13103                 struct extent_buffer *eb;
13104
13105                 eb = list_first_entry(&root->fs_info->recow_ebs,
13106                                       struct extent_buffer, recow);
13107                 list_del_init(&eb->recow);
13108                 ret = recow_extent_buffer(root, eb);
13109                 err |= !!ret;
13110                 if (ret) {
13111                         error("fails to fix transid errors");
13112                         break;
13113                 }
13114         }
13115
13116         while (!list_empty(&delete_items)) {
13117                 struct bad_item *bad;
13118
13119                 bad = list_first_entry(&delete_items, struct bad_item, list);
13120                 list_del_init(&bad->list);
13121                 if (repair) {
13122                         ret = delete_bad_item(root, bad);
13123                         err |= !!ret;
13124                 }
13125                 free(bad);
13126         }
13127
13128         if (info->quota_enabled) {
13129                 fprintf(stderr, "checking quota groups\n");
13130                 ret = qgroup_verify_all(info);
13131                 err |= !!ret;
13132                 if (ret) {
13133                         error("failed to check quota groups");
13134                         goto out;
13135                 }
13136                 report_qgroups(0);
13137                 ret = repair_qgroups(info, &qgroups_repaired);
13138                 err |= !!ret;
13139                 if (err) {
13140                         error("failed to repair quota groups");
13141                         goto out;
13142                 }
13143                 ret = 0;
13144         }
13145
13146         if (!list_empty(&root->fs_info->recow_ebs)) {
13147                 error("transid errors in file system");
13148                 ret = 1;
13149                 err |= !!ret;
13150         }
13151 out:
13152         if (found_old_backref) { /*
13153                  * there was a disk format change when mixed
13154                  * backref was in testing tree. The old format
13155                  * existed about one week.
13156                  */
13157                 printf("\n * Found old mixed backref format. "
13158                        "The old format is not supported! *"
13159                        "\n * Please mount the FS in readonly mode, "
13160                        "backup data and re-format the FS. *\n\n");
13161                 err |= 1;
13162         }
13163         printf("found %llu bytes used, ",
13164                (unsigned long long)bytes_used);
13165         if (err)
13166                 printf("error(s) found\n");
13167         else
13168                 printf("no error found\n");
13169         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13170         printf("total tree bytes: %llu\n",
13171                (unsigned long long)total_btree_bytes);
13172         printf("total fs tree bytes: %llu\n",
13173                (unsigned long long)total_fs_tree_bytes);
13174         printf("total extent tree bytes: %llu\n",
13175                (unsigned long long)total_extent_tree_bytes);
13176         printf("btree space waste bytes: %llu\n",
13177                (unsigned long long)btree_space_waste);
13178         printf("file data blocks allocated: %llu\n referenced %llu\n",
13179                 (unsigned long long)data_bytes_allocated,
13180                 (unsigned long long)data_bytes_referenced);
13181
13182         free_qgroup_counts();
13183         free_root_recs_tree(&root_cache);
13184 close_out:
13185         close_ctree(root);
13186 err_out:
13187         if (ctx.progress_enabled)
13188                 task_deinit(ctx.info);
13189
13190         return err;
13191 }