btrfs-progs: drop blocksize from read_tree_block
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         int level_size;
230         struct btrfs_key drop_key;
231 };
232
233 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
235 #define REF_ERR_NO_INODE_REF            (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
238 #define REF_ERR_DUP_INODE_REF           (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF             (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
246
247 struct file_extent_hole {
248         struct rb_node node;
249         u64 start;
250         u64 len;
251 };
252
253 struct inode_record {
254         struct list_head backrefs;
255         unsigned int checked:1;
256         unsigned int merging:1;
257         unsigned int found_inode_item:1;
258         unsigned int found_dir_item:1;
259         unsigned int found_file_extent:1;
260         unsigned int found_csum_item:1;
261         unsigned int some_csum_missing:1;
262         unsigned int nodatasum:1;
263         int errors;
264
265         u64 ino;
266         u32 nlink;
267         u32 imode;
268         u64 isize;
269         u64 nbytes;
270
271         u32 found_link;
272         u64 found_size;
273         u64 extent_start;
274         u64 extent_end;
275         struct rb_root holes;
276         struct list_head orphan_extents;
277
278         u32 refs;
279 };
280
281 #define I_ERR_NO_INODE_ITEM             (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
296
297 struct root_backref {
298         struct list_head list;
299         unsigned int found_dir_item:1;
300         unsigned int found_dir_index:1;
301         unsigned int found_back_ref:1;
302         unsigned int found_forward_ref:1;
303         unsigned int reachable:1;
304         int errors;
305         u64 ref_root;
306         u64 dir;
307         u64 index;
308         u16 namelen;
309         char name[0];
310 };
311
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
313 {
314         return list_entry(entry, struct root_backref, list);
315 }
316
317 struct root_record {
318         struct list_head backrefs;
319         struct cache_extent cache;
320         unsigned int found_root_item:1;
321         u64 objectid;
322         u32 found_ref;
323 };
324
325 struct ptr_node {
326         struct cache_extent cache;
327         void *data;
328 };
329
330 struct shared_node {
331         struct cache_extent cache;
332         struct cache_tree root_cache;
333         struct cache_tree inode_cache;
334         struct inode_record *current;
335         u32 refs;
336 };
337
338 struct block_info {
339         u64 start;
340         u32 size;
341 };
342
343 struct walk_control {
344         struct cache_tree shared;
345         struct shared_node *nodes[BTRFS_MAX_LEVEL];
346         int active_node;
347         int root_level;
348 };
349
350 struct bad_item {
351         struct btrfs_key key;
352         u64 root_id;
353         struct list_head list;
354 };
355
356 struct extent_entry {
357         u64 bytenr;
358         u64 bytes;
359         int count;
360         int broken;
361         struct list_head list;
362 };
363
364 struct root_item_info {
365         /* level of the root */
366         u8 level;
367         /* number of nodes at this level, must be 1 for a root */
368         int node_count;
369         u64 bytenr;
370         u64 gen;
371         struct cache_extent cache_extent;
372 };
373
374 /*
375  * Error bit for low memory mode check.
376  *
377  * Currently no caller cares about it yet.  Just internal use for error
378  * classification.
379  */
380 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH     (1 << 8)
390
391 static void *print_status_check(void *p)
392 {
393         struct task_ctx *priv = p;
394         const char work_indicator[] = { '.', 'o', 'O', 'o' };
395         uint32_t count = 0;
396         static char *task_position_string[] = {
397                 "checking extents",
398                 "checking free space cache",
399                 "checking fs roots",
400         };
401
402         task_period_start(priv->info, 1000 /* 1s */);
403
404         if (priv->tp == TASK_NOTHING)
405                 return NULL;
406
407         while (1) {
408                 printf("%s [%c]\r", task_position_string[priv->tp],
409                                 work_indicator[count % 4]);
410                 count++;
411                 fflush(stdout);
412                 task_period_wait(priv->info);
413         }
414         return NULL;
415 }
416
417 static int print_status_return(void *p)
418 {
419         printf("\n");
420         fflush(stdout);
421
422         return 0;
423 }
424
425 static enum btrfs_check_mode parse_check_mode(const char *str)
426 {
427         if (strcmp(str, "lowmem") == 0)
428                 return CHECK_MODE_LOWMEM;
429         if (strcmp(str, "orig") == 0)
430                 return CHECK_MODE_ORIGINAL;
431         if (strcmp(str, "original") == 0)
432                 return CHECK_MODE_ORIGINAL;
433
434         return CHECK_MODE_UNKNOWN;
435 }
436
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
439 {
440         struct file_extent_hole *hole;
441
442         if (RB_EMPTY_ROOT(holes))
443                 return (u64)-1;
444
445         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
446         return hole->start;
447 }
448
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
450 {
451         struct file_extent_hole *hole1;
452         struct file_extent_hole *hole2;
453
454         hole1 = rb_entry(node1, struct file_extent_hole, node);
455         hole2 = rb_entry(node2, struct file_extent_hole, node);
456
457         if (hole1->start > hole2->start)
458                 return -1;
459         if (hole1->start < hole2->start)
460                 return 1;
461         /* Now hole1->start == hole2->start */
462         if (hole1->len >= hole2->len)
463                 /*
464                  * Hole 1 will be merge center
465                  * Same hole will be merged later
466                  */
467                 return -1;
468         /* Hole 2 will be merge center */
469         return 1;
470 }
471
472 /*
473  * Add a hole to the record
474  *
475  * This will do hole merge for copy_file_extent_holes(),
476  * which will ensure there won't be continuous holes.
477  */
478 static int add_file_extent_hole(struct rb_root *holes,
479                                 u64 start, u64 len)
480 {
481         struct file_extent_hole *hole;
482         struct file_extent_hole *prev = NULL;
483         struct file_extent_hole *next = NULL;
484
485         hole = malloc(sizeof(*hole));
486         if (!hole)
487                 return -ENOMEM;
488         hole->start = start;
489         hole->len = len;
490         /* Since compare will not return 0, no -EEXIST will happen */
491         rb_insert(holes, &hole->node, compare_hole);
492
493         /* simple merge with previous hole */
494         if (rb_prev(&hole->node))
495                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
496                                 node);
497         if (prev && prev->start + prev->len >= hole->start) {
498                 hole->len = hole->start + hole->len - prev->start;
499                 hole->start = prev->start;
500                 rb_erase(&prev->node, holes);
501                 free(prev);
502                 prev = NULL;
503         }
504
505         /* iterate merge with next holes */
506         while (1) {
507                 if (!rb_next(&hole->node))
508                         break;
509                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
510                                         node);
511                 if (hole->start + hole->len >= next->start) {
512                         if (hole->start + hole->len <= next->start + next->len)
513                                 hole->len = next->start + next->len -
514                                             hole->start;
515                         rb_erase(&next->node, holes);
516                         free(next);
517                         next = NULL;
518                 } else
519                         break;
520         }
521         return 0;
522 }
523
524 static int compare_hole_range(struct rb_node *node, void *data)
525 {
526         struct file_extent_hole *hole;
527         u64 start;
528
529         hole = (struct file_extent_hole *)data;
530         start = hole->start;
531
532         hole = rb_entry(node, struct file_extent_hole, node);
533         if (start < hole->start)
534                 return -1;
535         if (start >= hole->start && start < hole->start + hole->len)
536                 return 0;
537         return 1;
538 }
539
540 /*
541  * Delete a hole in the record
542  *
543  * This will do the hole split and is much restrict than add.
544  */
545 static int del_file_extent_hole(struct rb_root *holes,
546                                 u64 start, u64 len)
547 {
548         struct file_extent_hole *hole;
549         struct file_extent_hole tmp;
550         u64 prev_start = 0;
551         u64 prev_len = 0;
552         u64 next_start = 0;
553         u64 next_len = 0;
554         struct rb_node *node;
555         int have_prev = 0;
556         int have_next = 0;
557         int ret = 0;
558
559         tmp.start = start;
560         tmp.len = len;
561         node = rb_search(holes, &tmp, compare_hole_range, NULL);
562         if (!node)
563                 return -EEXIST;
564         hole = rb_entry(node, struct file_extent_hole, node);
565         if (start + len > hole->start + hole->len)
566                 return -EEXIST;
567
568         /*
569          * Now there will be no overlap, delete the hole and re-add the
570          * split(s) if they exists.
571          */
572         if (start > hole->start) {
573                 prev_start = hole->start;
574                 prev_len = start - hole->start;
575                 have_prev = 1;
576         }
577         if (hole->start + hole->len > start + len) {
578                 next_start = start + len;
579                 next_len = hole->start + hole->len - start - len;
580                 have_next = 1;
581         }
582         rb_erase(node, holes);
583         free(hole);
584         if (have_prev) {
585                 ret = add_file_extent_hole(holes, prev_start, prev_len);
586                 if (ret < 0)
587                         return ret;
588         }
589         if (have_next) {
590                 ret = add_file_extent_hole(holes, next_start, next_len);
591                 if (ret < 0)
592                         return ret;
593         }
594         return 0;
595 }
596
597 static int copy_file_extent_holes(struct rb_root *dst,
598                                   struct rb_root *src)
599 {
600         struct file_extent_hole *hole;
601         struct rb_node *node;
602         int ret = 0;
603
604         node = rb_first(src);
605         while (node) {
606                 hole = rb_entry(node, struct file_extent_hole, node);
607                 ret = add_file_extent_hole(dst, hole->start, hole->len);
608                 if (ret)
609                         break;
610                 node = rb_next(node);
611         }
612         return ret;
613 }
614
615 static void free_file_extent_holes(struct rb_root *holes)
616 {
617         struct rb_node *node;
618         struct file_extent_hole *hole;
619
620         node = rb_first(holes);
621         while (node) {
622                 hole = rb_entry(node, struct file_extent_hole, node);
623                 rb_erase(node, holes);
624                 free(hole);
625                 node = rb_first(holes);
626         }
627 }
628
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
630
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632                                  struct btrfs_root *root)
633 {
634         if (root->last_trans != trans->transid) {
635                 root->track_dirty = 1;
636                 root->last_trans = trans->transid;
637                 root->commit_root = root->node;
638                 extent_buffer_get(root->node);
639         }
640 }
641
642 static u8 imode_to_type(u32 imode)
643 {
644 #define S_SHIFT 12
645         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
647                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
648                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
649                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
650                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
651                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
652                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
653         };
654
655         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
656 #undef S_SHIFT
657 }
658
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
660 {
661         struct device_record *rec1;
662         struct device_record *rec2;
663
664         rec1 = rb_entry(node1, struct device_record, node);
665         rec2 = rb_entry(node2, struct device_record, node);
666         if (rec1->devid > rec2->devid)
667                 return -1;
668         else if (rec1->devid < rec2->devid)
669                 return 1;
670         else
671                 return 0;
672 }
673
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
675 {
676         struct inode_record *rec;
677         struct inode_backref *backref;
678         struct inode_backref *orig;
679         struct inode_backref *tmp;
680         struct orphan_data_extent *src_orphan;
681         struct orphan_data_extent *dst_orphan;
682         struct rb_node *rb;
683         size_t size;
684         int ret;
685
686         rec = malloc(sizeof(*rec));
687         if (!rec)
688                 return ERR_PTR(-ENOMEM);
689         memcpy(rec, orig_rec, sizeof(*rec));
690         rec->refs = 1;
691         INIT_LIST_HEAD(&rec->backrefs);
692         INIT_LIST_HEAD(&rec->orphan_extents);
693         rec->holes = RB_ROOT;
694
695         list_for_each_entry(orig, &orig_rec->backrefs, list) {
696                 size = sizeof(*orig) + orig->namelen + 1;
697                 backref = malloc(size);
698                 if (!backref) {
699                         ret = -ENOMEM;
700                         goto cleanup;
701                 }
702                 memcpy(backref, orig, size);
703                 list_add_tail(&backref->list, &rec->backrefs);
704         }
705         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706                 dst_orphan = malloc(sizeof(*dst_orphan));
707                 if (!dst_orphan) {
708                         ret = -ENOMEM;
709                         goto cleanup;
710                 }
711                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
713         }
714         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
715         if (ret < 0)
716                 goto cleanup_rb;
717
718         return rec;
719
720 cleanup_rb:
721         rb = rb_first(&rec->holes);
722         while (rb) {
723                 struct file_extent_hole *hole;
724
725                 hole = rb_entry(rb, struct file_extent_hole, node);
726                 rb = rb_next(rb);
727                 free(hole);
728         }
729
730 cleanup:
731         if (!list_empty(&rec->backrefs))
732                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733                         list_del(&orig->list);
734                         free(orig);
735                 }
736
737         if (!list_empty(&rec->orphan_extents))
738                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739                         list_del(&orig->list);
740                         free(orig);
741                 }
742
743         free(rec);
744
745         return ERR_PTR(ret);
746 }
747
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
749                                       u64 objectid)
750 {
751         struct orphan_data_extent *orphan;
752
753         if (list_empty(orphan_extents))
754                 return;
755         printf("The following data extent is lost in tree %llu:\n",
756                objectid);
757         list_for_each_entry(orphan, orphan_extents, list) {
758                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
760                        orphan->disk_len);
761         }
762 }
763
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
765 {
766         u64 root_objectid = root->root_key.objectid;
767         int errors = rec->errors;
768
769         if (!errors)
770                 return;
771         /* reloc root errors, we print its corresponding fs root objectid*/
772         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773                 root_objectid = root->root_key.offset;
774                 fprintf(stderr, "reloc");
775         }
776         fprintf(stderr, "root %llu inode %llu errors %x",
777                 (unsigned long long) root_objectid,
778                 (unsigned long long) rec->ino, rec->errors);
779
780         if (errors & I_ERR_NO_INODE_ITEM)
781                 fprintf(stderr, ", no inode item");
782         if (errors & I_ERR_NO_ORPHAN_ITEM)
783                 fprintf(stderr, ", no orphan item");
784         if (errors & I_ERR_DUP_INODE_ITEM)
785                 fprintf(stderr, ", dup inode item");
786         if (errors & I_ERR_DUP_DIR_INDEX)
787                 fprintf(stderr, ", dup dir index");
788         if (errors & I_ERR_ODD_DIR_ITEM)
789                 fprintf(stderr, ", odd dir item");
790         if (errors & I_ERR_ODD_FILE_EXTENT)
791                 fprintf(stderr, ", odd file extent");
792         if (errors & I_ERR_BAD_FILE_EXTENT)
793                 fprintf(stderr, ", bad file extent");
794         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795                 fprintf(stderr, ", file extent overlap");
796         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797                 fprintf(stderr, ", file extent discount");
798         if (errors & I_ERR_DIR_ISIZE_WRONG)
799                 fprintf(stderr, ", dir isize wrong");
800         if (errors & I_ERR_FILE_NBYTES_WRONG)
801                 fprintf(stderr, ", nbytes wrong");
802         if (errors & I_ERR_ODD_CSUM_ITEM)
803                 fprintf(stderr, ", odd csum item");
804         if (errors & I_ERR_SOME_CSUM_MISSING)
805                 fprintf(stderr, ", some csum missing");
806         if (errors & I_ERR_LINK_COUNT_WRONG)
807                 fprintf(stderr, ", link count wrong");
808         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809                 fprintf(stderr, ", orphan file extent");
810         fprintf(stderr, "\n");
811         /* Print the orphan extents if needed */
812         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
814
815         /* Print the holes if needed */
816         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817                 struct file_extent_hole *hole;
818                 struct rb_node *node;
819                 int found = 0;
820
821                 node = rb_first(&rec->holes);
822                 fprintf(stderr, "Found file extent holes:\n");
823                 while (node) {
824                         found = 1;
825                         hole = rb_entry(node, struct file_extent_hole, node);
826                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
827                                 hole->start, hole->len);
828                         node = rb_next(node);
829                 }
830                 if (!found)
831                         fprintf(stderr, "\tstart: 0, len: %llu\n",
832                                 round_up(rec->isize,
833                                          root->fs_info->sectorsize));
834         }
835 }
836
837 static void print_ref_error(int errors)
838 {
839         if (errors & REF_ERR_NO_DIR_ITEM)
840                 fprintf(stderr, ", no dir item");
841         if (errors & REF_ERR_NO_DIR_INDEX)
842                 fprintf(stderr, ", no dir index");
843         if (errors & REF_ERR_NO_INODE_REF)
844                 fprintf(stderr, ", no inode ref");
845         if (errors & REF_ERR_DUP_DIR_ITEM)
846                 fprintf(stderr, ", dup dir item");
847         if (errors & REF_ERR_DUP_DIR_INDEX)
848                 fprintf(stderr, ", dup dir index");
849         if (errors & REF_ERR_DUP_INODE_REF)
850                 fprintf(stderr, ", dup inode ref");
851         if (errors & REF_ERR_INDEX_UNMATCH)
852                 fprintf(stderr, ", index mismatch");
853         if (errors & REF_ERR_FILETYPE_UNMATCH)
854                 fprintf(stderr, ", filetype mismatch");
855         if (errors & REF_ERR_NAME_TOO_LONG)
856                 fprintf(stderr, ", name too long");
857         if (errors & REF_ERR_NO_ROOT_REF)
858                 fprintf(stderr, ", no root ref");
859         if (errors & REF_ERR_NO_ROOT_BACKREF)
860                 fprintf(stderr, ", no root backref");
861         if (errors & REF_ERR_DUP_ROOT_REF)
862                 fprintf(stderr, ", dup root ref");
863         if (errors & REF_ERR_DUP_ROOT_BACKREF)
864                 fprintf(stderr, ", dup root backref");
865         fprintf(stderr, "\n");
866 }
867
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869                                           u64 ino, int mod)
870 {
871         struct ptr_node *node;
872         struct cache_extent *cache;
873         struct inode_record *rec = NULL;
874         int ret;
875
876         cache = lookup_cache_extent(inode_cache, ino, 1);
877         if (cache) {
878                 node = container_of(cache, struct ptr_node, cache);
879                 rec = node->data;
880                 if (mod && rec->refs > 1) {
881                         node->data = clone_inode_rec(rec);
882                         if (IS_ERR(node->data))
883                                 return node->data;
884                         rec->refs--;
885                         rec = node->data;
886                 }
887         } else if (mod) {
888                 rec = calloc(1, sizeof(*rec));
889                 if (!rec)
890                         return ERR_PTR(-ENOMEM);
891                 rec->ino = ino;
892                 rec->extent_start = (u64)-1;
893                 rec->refs = 1;
894                 INIT_LIST_HEAD(&rec->backrefs);
895                 INIT_LIST_HEAD(&rec->orphan_extents);
896                 rec->holes = RB_ROOT;
897
898                 node = malloc(sizeof(*node));
899                 if (!node) {
900                         free(rec);
901                         return ERR_PTR(-ENOMEM);
902                 }
903                 node->cache.start = ino;
904                 node->cache.size = 1;
905                 node->data = rec;
906
907                 if (ino == BTRFS_FREE_INO_OBJECTID)
908                         rec->found_link = 1;
909
910                 ret = insert_cache_extent(inode_cache, &node->cache);
911                 if (ret)
912                         return ERR_PTR(-EEXIST);
913         }
914         return rec;
915 }
916
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 {
919         struct orphan_data_extent *orphan;
920
921         while (!list_empty(orphan_extents)) {
922                 orphan = list_entry(orphan_extents->next,
923                                     struct orphan_data_extent, list);
924                 list_del(&orphan->list);
925                 free(orphan);
926         }
927 }
928
929 static void free_inode_rec(struct inode_record *rec)
930 {
931         struct inode_backref *backref;
932
933         if (--rec->refs > 0)
934                 return;
935
936         while (!list_empty(&rec->backrefs)) {
937                 backref = to_inode_backref(rec->backrefs.next);
938                 list_del(&backref->list);
939                 free(backref);
940         }
941         free_orphan_data_extents(&rec->orphan_extents);
942         free_file_extent_holes(&rec->holes);
943         free(rec);
944 }
945
946 static int can_free_inode_rec(struct inode_record *rec)
947 {
948         if (!rec->errors && rec->checked && rec->found_inode_item &&
949             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
950                 return 1;
951         return 0;
952 }
953
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955                                  struct inode_record *rec)
956 {
957         struct cache_extent *cache;
958         struct inode_backref *tmp, *backref;
959         struct ptr_node *node;
960         u8 filetype;
961
962         if (!rec->found_inode_item)
963                 return;
964
965         filetype = imode_to_type(rec->imode);
966         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967                 if (backref->found_dir_item && backref->found_dir_index) {
968                         if (backref->filetype != filetype)
969                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970                         if (!backref->errors && backref->found_inode_ref &&
971                             rec->nlink == rec->found_link) {
972                                 list_del(&backref->list);
973                                 free(backref);
974                         }
975                 }
976         }
977
978         if (!rec->checked || rec->merging)
979                 return;
980
981         if (S_ISDIR(rec->imode)) {
982                 if (rec->found_size != rec->isize)
983                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984                 if (rec->found_file_extent)
985                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
986         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987                 if (rec->found_dir_item)
988                         rec->errors |= I_ERR_ODD_DIR_ITEM;
989                 if (rec->found_size != rec->nbytes)
990                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991                 if (rec->nlink > 0 && !no_holes &&
992                     (rec->extent_end < rec->isize ||
993                      first_extent_gap(&rec->holes) < rec->isize))
994                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
995         }
996
997         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998                 if (rec->found_csum_item && rec->nodatasum)
999                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000                 if (rec->some_csum_missing && !rec->nodatasum)
1001                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1002         }
1003
1004         BUG_ON(rec->refs != 1);
1005         if (can_free_inode_rec(rec)) {
1006                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007                 node = container_of(cache, struct ptr_node, cache);
1008                 BUG_ON(node->data != rec);
1009                 remove_cache_extent(inode_cache, &node->cache);
1010                 free(node);
1011                 free_inode_rec(rec);
1012         }
1013 }
1014
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 {
1017         struct btrfs_path path;
1018         struct btrfs_key key;
1019         int ret;
1020
1021         key.objectid = BTRFS_ORPHAN_OBJECTID;
1022         key.type = BTRFS_ORPHAN_ITEM_KEY;
1023         key.offset = ino;
1024
1025         btrfs_init_path(&path);
1026         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027         btrfs_release_path(&path);
1028         if (ret > 0)
1029                 ret = -ENOENT;
1030         return ret;
1031 }
1032
1033 static int process_inode_item(struct extent_buffer *eb,
1034                               int slot, struct btrfs_key *key,
1035                               struct shared_node *active_node)
1036 {
1037         struct inode_record *rec;
1038         struct btrfs_inode_item *item;
1039
1040         rec = active_node->current;
1041         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042         if (rec->found_inode_item) {
1043                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044                 return 1;
1045         }
1046         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047         rec->nlink = btrfs_inode_nlink(eb, item);
1048         rec->isize = btrfs_inode_size(eb, item);
1049         rec->nbytes = btrfs_inode_nbytes(eb, item);
1050         rec->imode = btrfs_inode_mode(eb, item);
1051         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052                 rec->nodatasum = 1;
1053         rec->found_inode_item = 1;
1054         if (rec->nlink == 0)
1055                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056         maybe_free_inode_rec(&active_node->inode_cache, rec);
1057         return 0;
1058 }
1059
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061                                                 const char *name,
1062                                                 int namelen, u64 dir)
1063 {
1064         struct inode_backref *backref;
1065
1066         list_for_each_entry(backref, &rec->backrefs, list) {
1067                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068                         break;
1069                 if (backref->dir != dir || backref->namelen != namelen)
1070                         continue;
1071                 if (memcmp(name, backref->name, namelen))
1072                         continue;
1073                 return backref;
1074         }
1075
1076         backref = malloc(sizeof(*backref) + namelen + 1);
1077         if (!backref)
1078                 return NULL;
1079         memset(backref, 0, sizeof(*backref));
1080         backref->dir = dir;
1081         backref->namelen = namelen;
1082         memcpy(backref->name, name, namelen);
1083         backref->name[namelen] = '\0';
1084         list_add_tail(&backref->list, &rec->backrefs);
1085         return backref;
1086 }
1087
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089                              u64 ino, u64 dir, u64 index,
1090                              const char *name, int namelen,
1091                              u8 filetype, u8 itemtype, int errors)
1092 {
1093         struct inode_record *rec;
1094         struct inode_backref *backref;
1095
1096         rec = get_inode_rec(inode_cache, ino, 1);
1097         BUG_ON(IS_ERR(rec));
1098         backref = get_inode_backref(rec, name, namelen, dir);
1099         BUG_ON(!backref);
1100         if (errors)
1101                 backref->errors |= errors;
1102         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103                 if (backref->found_dir_index)
1104                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105                 if (backref->found_inode_ref && backref->index != index)
1106                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1107                 if (backref->found_dir_item && backref->filetype != filetype)
1108                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109
1110                 backref->index = index;
1111                 backref->filetype = filetype;
1112                 backref->found_dir_index = 1;
1113         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114                 rec->found_link++;
1115                 if (backref->found_dir_item)
1116                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117                 if (backref->found_dir_index && backref->filetype != filetype)
1118                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119
1120                 backref->filetype = filetype;
1121                 backref->found_dir_item = 1;
1122         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124                 if (backref->found_inode_ref)
1125                         backref->errors |= REF_ERR_DUP_INODE_REF;
1126                 if (backref->found_dir_index && backref->index != index)
1127                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1128                 else
1129                         backref->index = index;
1130
1131                 backref->ref_type = itemtype;
1132                 backref->found_inode_ref = 1;
1133         } else {
1134                 BUG_ON(1);
1135         }
1136
1137         maybe_free_inode_rec(inode_cache, rec);
1138         return 0;
1139 }
1140
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142                             struct cache_tree *dst_cache)
1143 {
1144         struct inode_backref *backref;
1145         u32 dir_count = 0;
1146         int ret = 0;
1147
1148         dst->merging = 1;
1149         list_for_each_entry(backref, &src->backrefs, list) {
1150                 if (backref->found_dir_index) {
1151                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1152                                         backref->index, backref->name,
1153                                         backref->namelen, backref->filetype,
1154                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1155                 }
1156                 if (backref->found_dir_item) {
1157                         dir_count++;
1158                         add_inode_backref(dst_cache, dst->ino,
1159                                         backref->dir, 0, backref->name,
1160                                         backref->namelen, backref->filetype,
1161                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1162                 }
1163                 if (backref->found_inode_ref) {
1164                         add_inode_backref(dst_cache, dst->ino,
1165                                         backref->dir, backref->index,
1166                                         backref->name, backref->namelen, 0,
1167                                         backref->ref_type, backref->errors);
1168                 }
1169         }
1170
1171         if (src->found_dir_item)
1172                 dst->found_dir_item = 1;
1173         if (src->found_file_extent)
1174                 dst->found_file_extent = 1;
1175         if (src->found_csum_item)
1176                 dst->found_csum_item = 1;
1177         if (src->some_csum_missing)
1178                 dst->some_csum_missing = 1;
1179         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1181                 if (ret < 0)
1182                         return ret;
1183         }
1184
1185         BUG_ON(src->found_link < dir_count);
1186         dst->found_link += src->found_link - dir_count;
1187         dst->found_size += src->found_size;
1188         if (src->extent_start != (u64)-1) {
1189                 if (dst->extent_start == (u64)-1) {
1190                         dst->extent_start = src->extent_start;
1191                         dst->extent_end = src->extent_end;
1192                 } else {
1193                         if (dst->extent_end > src->extent_start)
1194                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195                         else if (dst->extent_end < src->extent_start) {
1196                                 ret = add_file_extent_hole(&dst->holes,
1197                                         dst->extent_end,
1198                                         src->extent_start - dst->extent_end);
1199                         }
1200                         if (dst->extent_end < src->extent_end)
1201                                 dst->extent_end = src->extent_end;
1202                 }
1203         }
1204
1205         dst->errors |= src->errors;
1206         if (src->found_inode_item) {
1207                 if (!dst->found_inode_item) {
1208                         dst->nlink = src->nlink;
1209                         dst->isize = src->isize;
1210                         dst->nbytes = src->nbytes;
1211                         dst->imode = src->imode;
1212                         dst->nodatasum = src->nodatasum;
1213                         dst->found_inode_item = 1;
1214                 } else {
1215                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1216                 }
1217         }
1218         dst->merging = 0;
1219
1220         return 0;
1221 }
1222
1223 static int splice_shared_node(struct shared_node *src_node,
1224                               struct shared_node *dst_node)
1225 {
1226         struct cache_extent *cache;
1227         struct ptr_node *node, *ins;
1228         struct cache_tree *src, *dst;
1229         struct inode_record *rec, *conflict;
1230         u64 current_ino = 0;
1231         int splice = 0;
1232         int ret;
1233
1234         if (--src_node->refs == 0)
1235                 splice = 1;
1236         if (src_node->current)
1237                 current_ino = src_node->current->ino;
1238
1239         src = &src_node->root_cache;
1240         dst = &dst_node->root_cache;
1241 again:
1242         cache = search_cache_extent(src, 0);
1243         while (cache) {
1244                 node = container_of(cache, struct ptr_node, cache);
1245                 rec = node->data;
1246                 cache = next_cache_extent(cache);
1247
1248                 if (splice) {
1249                         remove_cache_extent(src, &node->cache);
1250                         ins = node;
1251                 } else {
1252                         ins = malloc(sizeof(*ins));
1253                         BUG_ON(!ins);
1254                         ins->cache.start = node->cache.start;
1255                         ins->cache.size = node->cache.size;
1256                         ins->data = rec;
1257                         rec->refs++;
1258                 }
1259                 ret = insert_cache_extent(dst, &ins->cache);
1260                 if (ret == -EEXIST) {
1261                         conflict = get_inode_rec(dst, rec->ino, 1);
1262                         BUG_ON(IS_ERR(conflict));
1263                         merge_inode_recs(rec, conflict, dst);
1264                         if (rec->checked) {
1265                                 conflict->checked = 1;
1266                                 if (dst_node->current == conflict)
1267                                         dst_node->current = NULL;
1268                         }
1269                         maybe_free_inode_rec(dst, conflict);
1270                         free_inode_rec(rec);
1271                         free(ins);
1272                 } else {
1273                         BUG_ON(ret);
1274                 }
1275         }
1276
1277         if (src == &src_node->root_cache) {
1278                 src = &src_node->inode_cache;
1279                 dst = &dst_node->inode_cache;
1280                 goto again;
1281         }
1282
1283         if (current_ino > 0 && (!dst_node->current ||
1284             current_ino > dst_node->current->ino)) {
1285                 if (dst_node->current) {
1286                         dst_node->current->checked = 1;
1287                         maybe_free_inode_rec(dst, dst_node->current);
1288                 }
1289                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290                 BUG_ON(IS_ERR(dst_node->current));
1291         }
1292         return 0;
1293 }
1294
1295 static void free_inode_ptr(struct cache_extent *cache)
1296 {
1297         struct ptr_node *node;
1298         struct inode_record *rec;
1299
1300         node = container_of(cache, struct ptr_node, cache);
1301         rec = node->data;
1302         free_inode_rec(rec);
1303         free(node);
1304 }
1305
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309                                             u64 bytenr)
1310 {
1311         struct cache_extent *cache;
1312         struct shared_node *node;
1313
1314         cache = lookup_cache_extent(shared, bytenr, 1);
1315         if (cache) {
1316                 node = container_of(cache, struct shared_node, cache);
1317                 return node;
1318         }
1319         return NULL;
1320 }
1321
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 {
1324         int ret;
1325         struct shared_node *node;
1326
1327         node = calloc(1, sizeof(*node));
1328         if (!node)
1329                 return -ENOMEM;
1330         node->cache.start = bytenr;
1331         node->cache.size = 1;
1332         cache_tree_init(&node->root_cache);
1333         cache_tree_init(&node->inode_cache);
1334         node->refs = refs;
1335
1336         ret = insert_cache_extent(shared, &node->cache);
1337
1338         return ret;
1339 }
1340
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342                              struct walk_control *wc, int level)
1343 {
1344         struct shared_node *node;
1345         struct shared_node *dest;
1346         int ret;
1347
1348         if (level == wc->active_node)
1349                 return 0;
1350
1351         BUG_ON(wc->active_node <= level);
1352         node = find_shared_node(&wc->shared, bytenr);
1353         if (!node) {
1354                 ret = add_shared_node(&wc->shared, bytenr, refs);
1355                 BUG_ON(ret);
1356                 node = find_shared_node(&wc->shared, bytenr);
1357                 wc->nodes[level] = node;
1358                 wc->active_node = level;
1359                 return 0;
1360         }
1361
1362         if (wc->root_level == wc->active_node &&
1363             btrfs_root_refs(&root->root_item) == 0) {
1364                 if (--node->refs == 0) {
1365                         free_inode_recs_tree(&node->root_cache);
1366                         free_inode_recs_tree(&node->inode_cache);
1367                         remove_cache_extent(&wc->shared, &node->cache);
1368                         free(node);
1369                 }
1370                 return 1;
1371         }
1372
1373         dest = wc->nodes[wc->active_node];
1374         splice_shared_node(node, dest);
1375         if (node->refs == 0) {
1376                 remove_cache_extent(&wc->shared, &node->cache);
1377                 free(node);
1378         }
1379         return 1;
1380 }
1381
1382 static int leave_shared_node(struct btrfs_root *root,
1383                              struct walk_control *wc, int level)
1384 {
1385         struct shared_node *node;
1386         struct shared_node *dest;
1387         int i;
1388
1389         if (level == wc->root_level)
1390                 return 0;
1391
1392         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1393                 if (wc->nodes[i])
1394                         break;
1395         }
1396         BUG_ON(i >= BTRFS_MAX_LEVEL);
1397
1398         node = wc->nodes[wc->active_node];
1399         wc->nodes[wc->active_node] = NULL;
1400         wc->active_node = i;
1401
1402         dest = wc->nodes[wc->active_node];
1403         if (wc->active_node < wc->root_level ||
1404             btrfs_root_refs(&root->root_item) > 0) {
1405                 BUG_ON(node->refs <= 1);
1406                 splice_shared_node(node, dest);
1407         } else {
1408                 BUG_ON(node->refs < 2);
1409                 node->refs--;
1410         }
1411         return 0;
1412 }
1413
1414 /*
1415  * Returns:
1416  * < 0 - on error
1417  * 1   - if the root with id child_root_id is a child of root parent_root_id
1418  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1419  *       has other root(s) as parent(s)
1420  * 2   - if the root child_root_id doesn't have any parent roots
1421  */
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423                          u64 child_root_id)
1424 {
1425         struct btrfs_path path;
1426         struct btrfs_key key;
1427         struct extent_buffer *leaf;
1428         int has_parent = 0;
1429         int ret;
1430
1431         btrfs_init_path(&path);
1432
1433         key.objectid = parent_root_id;
1434         key.type = BTRFS_ROOT_REF_KEY;
1435         key.offset = child_root_id;
1436         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1437                                 0, 0);
1438         if (ret < 0)
1439                 return ret;
1440         btrfs_release_path(&path);
1441         if (!ret)
1442                 return 1;
1443
1444         key.objectid = child_root_id;
1445         key.type = BTRFS_ROOT_BACKREF_KEY;
1446         key.offset = 0;
1447         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1448                                 0, 0);
1449         if (ret < 0)
1450                 goto out;
1451
1452         while (1) {
1453                 leaf = path.nodes[0];
1454                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456                         if (ret)
1457                                 break;
1458                         leaf = path.nodes[0];
1459                 }
1460
1461                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462                 if (key.objectid != child_root_id ||
1463                     key.type != BTRFS_ROOT_BACKREF_KEY)
1464                         break;
1465
1466                 has_parent = 1;
1467
1468                 if (key.offset == parent_root_id) {
1469                         btrfs_release_path(&path);
1470                         return 1;
1471                 }
1472
1473                 path.slots[0]++;
1474         }
1475 out:
1476         btrfs_release_path(&path);
1477         if (ret < 0)
1478                 return ret;
1479         return has_parent ? 0 : 2;
1480 }
1481
1482 static int process_dir_item(struct extent_buffer *eb,
1483                             int slot, struct btrfs_key *key,
1484                             struct shared_node *active_node)
1485 {
1486         u32 total;
1487         u32 cur = 0;
1488         u32 len;
1489         u32 name_len;
1490         u32 data_len;
1491         int error;
1492         int nritems = 0;
1493         u8 filetype;
1494         struct btrfs_dir_item *di;
1495         struct inode_record *rec;
1496         struct cache_tree *root_cache;
1497         struct cache_tree *inode_cache;
1498         struct btrfs_key location;
1499         char namebuf[BTRFS_NAME_LEN];
1500
1501         root_cache = &active_node->root_cache;
1502         inode_cache = &active_node->inode_cache;
1503         rec = active_node->current;
1504         rec->found_dir_item = 1;
1505
1506         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507         total = btrfs_item_size_nr(eb, slot);
1508         while (cur < total) {
1509                 nritems++;
1510                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511                 name_len = btrfs_dir_name_len(eb, di);
1512                 data_len = btrfs_dir_data_len(eb, di);
1513                 filetype = btrfs_dir_type(eb, di);
1514
1515                 rec->found_size += name_len;
1516                 if (cur + sizeof(*di) + name_len > total ||
1517                     name_len > BTRFS_NAME_LEN) {
1518                         error = REF_ERR_NAME_TOO_LONG;
1519
1520                         if (cur + sizeof(*di) > total)
1521                                 break;
1522                         len = min_t(u32, total - cur - sizeof(*di),
1523                                     BTRFS_NAME_LEN);
1524                 } else {
1525                         len = name_len;
1526                         error = 0;
1527                 }
1528
1529                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530
1531                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1532                     key->offset != btrfs_name_hash(namebuf, len)) {
1533                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1534                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1535                         key->objectid, key->offset, namebuf, len, filetype,
1536                         key->offset, btrfs_name_hash(namebuf, len));
1537                 }
1538
1539                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1540                         add_inode_backref(inode_cache, location.objectid,
1541                                           key->objectid, key->offset, namebuf,
1542                                           len, filetype, key->type, error);
1543                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1544                         add_inode_backref(root_cache, location.objectid,
1545                                           key->objectid, key->offset,
1546                                           namebuf, len, filetype,
1547                                           key->type, error);
1548                 } else {
1549                         fprintf(stderr, "invalid location in dir item %u\n",
1550                                 location.type);
1551                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1552                                           key->objectid, key->offset, namebuf,
1553                                           len, filetype, key->type, error);
1554                 }
1555
1556                 len = sizeof(*di) + name_len + data_len;
1557                 di = (struct btrfs_dir_item *)((char *)di + len);
1558                 cur += len;
1559         }
1560         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1561                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1562
1563         return 0;
1564 }
1565
1566 static int process_inode_ref(struct extent_buffer *eb,
1567                              int slot, struct btrfs_key *key,
1568                              struct shared_node *active_node)
1569 {
1570         u32 total;
1571         u32 cur = 0;
1572         u32 len;
1573         u32 name_len;
1574         u64 index;
1575         int error;
1576         struct cache_tree *inode_cache;
1577         struct btrfs_inode_ref *ref;
1578         char namebuf[BTRFS_NAME_LEN];
1579
1580         inode_cache = &active_node->inode_cache;
1581
1582         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1583         total = btrfs_item_size_nr(eb, slot);
1584         while (cur < total) {
1585                 name_len = btrfs_inode_ref_name_len(eb, ref);
1586                 index = btrfs_inode_ref_index(eb, ref);
1587
1588                 /* inode_ref + namelen should not cross item boundary */
1589                 if (cur + sizeof(*ref) + name_len > total ||
1590                     name_len > BTRFS_NAME_LEN) {
1591                         if (total < cur + sizeof(*ref))
1592                                 break;
1593
1594                         /* Still try to read out the remaining part */
1595                         len = min_t(u32, total - cur - sizeof(*ref),
1596                                     BTRFS_NAME_LEN);
1597                         error = REF_ERR_NAME_TOO_LONG;
1598                 } else {
1599                         len = name_len;
1600                         error = 0;
1601                 }
1602
1603                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, key->offset,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*ref) + name_len;
1608                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612 }
1613
1614 static int process_inode_extref(struct extent_buffer *eb,
1615                                 int slot, struct btrfs_key *key,
1616                                 struct shared_node *active_node)
1617 {
1618         u32 total;
1619         u32 cur = 0;
1620         u32 len;
1621         u32 name_len;
1622         u64 index;
1623         u64 parent;
1624         int error;
1625         struct cache_tree *inode_cache;
1626         struct btrfs_inode_extref *extref;
1627         char namebuf[BTRFS_NAME_LEN];
1628
1629         inode_cache = &active_node->inode_cache;
1630
1631         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1632         total = btrfs_item_size_nr(eb, slot);
1633         while (cur < total) {
1634                 name_len = btrfs_inode_extref_name_len(eb, extref);
1635                 index = btrfs_inode_extref_index(eb, extref);
1636                 parent = btrfs_inode_extref_parent(eb, extref);
1637                 if (name_len <= BTRFS_NAME_LEN) {
1638                         len = name_len;
1639                         error = 0;
1640                 } else {
1641                         len = BTRFS_NAME_LEN;
1642                         error = REF_ERR_NAME_TOO_LONG;
1643                 }
1644                 read_extent_buffer(eb, namebuf,
1645                                    (unsigned long)(extref + 1), len);
1646                 add_inode_backref(inode_cache, key->objectid, parent,
1647                                   index, namebuf, len, 0, key->type, error);
1648
1649                 len = sizeof(*extref) + name_len;
1650                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1651                 cur += len;
1652         }
1653         return 0;
1654
1655 }
1656
1657 static int count_csum_range(struct btrfs_root *root, u64 start,
1658                             u64 len, u64 *found)
1659 {
1660         struct btrfs_key key;
1661         struct btrfs_path path;
1662         struct extent_buffer *leaf;
1663         int ret;
1664         size_t size;
1665         *found = 0;
1666         u64 csum_end;
1667         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1668
1669         btrfs_init_path(&path);
1670
1671         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1672         key.offset = start;
1673         key.type = BTRFS_EXTENT_CSUM_KEY;
1674
1675         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1676                                 &key, &path, 0, 0);
1677         if (ret < 0)
1678                 goto out;
1679         if (ret > 0 && path.slots[0] > 0) {
1680                 leaf = path.nodes[0];
1681                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1682                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1683                     key.type == BTRFS_EXTENT_CSUM_KEY)
1684                         path.slots[0]--;
1685         }
1686
1687         while (len > 0) {
1688                 leaf = path.nodes[0];
1689                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1690                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1691                         if (ret > 0)
1692                                 break;
1693                         else if (ret < 0)
1694                                 goto out;
1695                         leaf = path.nodes[0];
1696                 }
1697
1698                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1699                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1700                     key.type != BTRFS_EXTENT_CSUM_KEY)
1701                         break;
1702
1703                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1704                 if (key.offset >= start + len)
1705                         break;
1706
1707                 if (key.offset > start)
1708                         start = key.offset;
1709
1710                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1711                 csum_end = key.offset + (size / csum_size) *
1712                            root->fs_info->sectorsize;
1713                 if (csum_end > start) {
1714                         size = min(csum_end - start, len);
1715                         len -= size;
1716                         start += size;
1717                         *found += size;
1718                 }
1719
1720                 path.slots[0]++;
1721         }
1722 out:
1723         btrfs_release_path(&path);
1724         if (ret < 0)
1725                 return ret;
1726         return 0;
1727 }
1728
1729 static int process_file_extent(struct btrfs_root *root,
1730                                 struct extent_buffer *eb,
1731                                 int slot, struct btrfs_key *key,
1732                                 struct shared_node *active_node)
1733 {
1734         struct inode_record *rec;
1735         struct btrfs_file_extent_item *fi;
1736         u64 num_bytes = 0;
1737         u64 disk_bytenr = 0;
1738         u64 extent_offset = 0;
1739         u64 mask = root->fs_info->sectorsize - 1;
1740         int extent_type;
1741         int ret;
1742
1743         rec = active_node->current;
1744         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1745         rec->found_file_extent = 1;
1746
1747         if (rec->extent_start == (u64)-1) {
1748                 rec->extent_start = key->offset;
1749                 rec->extent_end = key->offset;
1750         }
1751
1752         if (rec->extent_end > key->offset)
1753                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1754         else if (rec->extent_end < key->offset) {
1755                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1756                                            key->offset - rec->extent_end);
1757                 if (ret < 0)
1758                         return ret;
1759         }
1760
1761         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1762         extent_type = btrfs_file_extent_type(eb, fi);
1763
1764         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1765                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1766                 if (num_bytes == 0)
1767                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1768                 rec->found_size += num_bytes;
1769                 num_bytes = (num_bytes + mask) & ~mask;
1770         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1771                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1772                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1773                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1774                 extent_offset = btrfs_file_extent_offset(eb, fi);
1775                 if (num_bytes == 0 || (num_bytes & mask))
1776                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777                 if (num_bytes + extent_offset >
1778                     btrfs_file_extent_ram_bytes(eb, fi))
1779                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1780                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1781                     (btrfs_file_extent_compression(eb, fi) ||
1782                      btrfs_file_extent_encryption(eb, fi) ||
1783                      btrfs_file_extent_other_encoding(eb, fi)))
1784                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1785                 if (disk_bytenr > 0)
1786                         rec->found_size += num_bytes;
1787         } else {
1788                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1789         }
1790         rec->extent_end = key->offset + num_bytes;
1791
1792         /*
1793          * The data reloc tree will copy full extents into its inode and then
1794          * copy the corresponding csums.  Because the extent it copied could be
1795          * a preallocated extent that hasn't been written to yet there may be no
1796          * csums to copy, ergo we won't have csums for our file extent.  This is
1797          * ok so just don't bother checking csums if the inode belongs to the
1798          * data reloc tree.
1799          */
1800         if (disk_bytenr > 0 &&
1801             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1802                 u64 found;
1803                 if (btrfs_file_extent_compression(eb, fi))
1804                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1805                 else
1806                         disk_bytenr += extent_offset;
1807
1808                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1809                 if (ret < 0)
1810                         return ret;
1811                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1812                         if (found > 0)
1813                                 rec->found_csum_item = 1;
1814                         if (found < num_bytes)
1815                                 rec->some_csum_missing = 1;
1816                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1817                         if (found > 0)
1818                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1819                 }
1820         }
1821         return 0;
1822 }
1823
1824 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1825                             struct walk_control *wc)
1826 {
1827         struct btrfs_key key;
1828         u32 nritems;
1829         int i;
1830         int ret = 0;
1831         struct cache_tree *inode_cache;
1832         struct shared_node *active_node;
1833
1834         if (wc->root_level == wc->active_node &&
1835             btrfs_root_refs(&root->root_item) == 0)
1836                 return 0;
1837
1838         active_node = wc->nodes[wc->active_node];
1839         inode_cache = &active_node->inode_cache;
1840         nritems = btrfs_header_nritems(eb);
1841         for (i = 0; i < nritems; i++) {
1842                 btrfs_item_key_to_cpu(eb, &key, i);
1843
1844                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1845                         continue;
1846                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1847                         continue;
1848
1849                 if (active_node->current == NULL ||
1850                     active_node->current->ino < key.objectid) {
1851                         if (active_node->current) {
1852                                 active_node->current->checked = 1;
1853                                 maybe_free_inode_rec(inode_cache,
1854                                                      active_node->current);
1855                         }
1856                         active_node->current = get_inode_rec(inode_cache,
1857                                                              key.objectid, 1);
1858                         BUG_ON(IS_ERR(active_node->current));
1859                 }
1860                 switch (key.type) {
1861                 case BTRFS_DIR_ITEM_KEY:
1862                 case BTRFS_DIR_INDEX_KEY:
1863                         ret = process_dir_item(eb, i, &key, active_node);
1864                         break;
1865                 case BTRFS_INODE_REF_KEY:
1866                         ret = process_inode_ref(eb, i, &key, active_node);
1867                         break;
1868                 case BTRFS_INODE_EXTREF_KEY:
1869                         ret = process_inode_extref(eb, i, &key, active_node);
1870                         break;
1871                 case BTRFS_INODE_ITEM_KEY:
1872                         ret = process_inode_item(eb, i, &key, active_node);
1873                         break;
1874                 case BTRFS_EXTENT_DATA_KEY:
1875                         ret = process_file_extent(root, eb, i, &key,
1876                                                   active_node);
1877                         break;
1878                 default:
1879                         break;
1880                 };
1881         }
1882         return ret;
1883 }
1884
1885 struct node_refs {
1886         u64 bytenr[BTRFS_MAX_LEVEL];
1887         u64 refs[BTRFS_MAX_LEVEL];
1888         int need_check[BTRFS_MAX_LEVEL];
1889 };
1890
1891 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1892                              struct node_refs *nrefs, u64 level);
1893 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1894                             unsigned int ext_ref);
1895
1896 /*
1897  * Returns >0  Found error, not fatal, should continue
1898  * Returns <0  Fatal error, must exit the whole check
1899  * Returns 0   No errors found
1900  */
1901 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1902                                struct node_refs *nrefs, int *level, int ext_ref)
1903 {
1904         struct extent_buffer *cur = path->nodes[0];
1905         struct btrfs_key key;
1906         u64 cur_bytenr;
1907         u32 nritems;
1908         u64 first_ino = 0;
1909         int root_level = btrfs_header_level(root->node);
1910         int i;
1911         int ret = 0; /* Final return value */
1912         int err = 0; /* Positive error bitmap */
1913
1914         cur_bytenr = cur->start;
1915
1916         /* skip to first inode item or the first inode number change */
1917         nritems = btrfs_header_nritems(cur);
1918         for (i = 0; i < nritems; i++) {
1919                 btrfs_item_key_to_cpu(cur, &key, i);
1920                 if (i == 0)
1921                         first_ino = key.objectid;
1922                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1923                     (first_ino && first_ino != key.objectid))
1924                         break;
1925         }
1926         if (i == nritems) {
1927                 path->slots[0] = nritems;
1928                 return 0;
1929         }
1930         path->slots[0] = i;
1931
1932 again:
1933         err |= check_inode_item(root, path, ext_ref);
1934
1935         if (err & LAST_ITEM)
1936                 goto out;
1937
1938         /* still have inode items in thie leaf */
1939         if (cur->start == cur_bytenr)
1940                 goto again;
1941
1942         /*
1943          * we have switched to another leaf, above nodes may
1944          * have changed, here walk down the path, if a node
1945          * or leaf is shared, check whether we can skip this
1946          * node or leaf.
1947          */
1948         for (i = root_level; i >= 0; i--) {
1949                 if (path->nodes[i]->start == nrefs->bytenr[i])
1950                         continue;
1951
1952                 ret = update_nodes_refs(root,
1953                                 path->nodes[i]->start,
1954                                 nrefs, i);
1955                 if (ret)
1956                         goto out;
1957
1958                 if (!nrefs->need_check[i]) {
1959                         *level += 1;
1960                         break;
1961                 }
1962         }
1963
1964         for (i = 0; i < *level; i++) {
1965                 free_extent_buffer(path->nodes[i]);
1966                 path->nodes[i] = NULL;
1967         }
1968 out:
1969         err &= ~LAST_ITEM;
1970         if (err && !ret)
1971                 ret = err;
1972         return ret;
1973 }
1974
1975 static void reada_walk_down(struct btrfs_root *root,
1976                             struct extent_buffer *node, int slot)
1977 {
1978         struct btrfs_fs_info *fs_info = root->fs_info;
1979         u64 bytenr;
1980         u64 ptr_gen;
1981         u32 nritems;
1982         int i;
1983         int level;
1984
1985         level = btrfs_header_level(node);
1986         if (level != 1)
1987                 return;
1988
1989         nritems = btrfs_header_nritems(node);
1990         for (i = slot; i < nritems; i++) {
1991                 bytenr = btrfs_node_blockptr(node, i);
1992                 ptr_gen = btrfs_node_ptr_generation(node, i);
1993                 readahead_tree_block(fs_info, bytenr, fs_info->nodesize,
1994                                 ptr_gen);
1995         }
1996 }
1997
1998 /*
1999  * Check the child node/leaf by the following condition:
2000  * 1. the first item key of the node/leaf should be the same with the one
2001  *    in parent.
2002  * 2. block in parent node should match the child node/leaf.
2003  * 3. generation of parent node and child's header should be consistent.
2004  *
2005  * Or the child node/leaf pointed by the key in parent is not valid.
2006  *
2007  * We hope to check leaf owner too, but since subvol may share leaves,
2008  * which makes leaf owner check not so strong, key check should be
2009  * sufficient enough for that case.
2010  */
2011 static int check_child_node(struct extent_buffer *parent, int slot,
2012                             struct extent_buffer *child)
2013 {
2014         struct btrfs_key parent_key;
2015         struct btrfs_key child_key;
2016         int ret = 0;
2017
2018         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2019         if (btrfs_header_level(child) == 0)
2020                 btrfs_item_key_to_cpu(child, &child_key, 0);
2021         else
2022                 btrfs_node_key_to_cpu(child, &child_key, 0);
2023
2024         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2025                 ret = -EINVAL;
2026                 fprintf(stderr,
2027                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2028                         parent_key.objectid, parent_key.type, parent_key.offset,
2029                         child_key.objectid, child_key.type, child_key.offset);
2030         }
2031         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2032                 ret = -EINVAL;
2033                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2034                         btrfs_node_blockptr(parent, slot),
2035                         btrfs_header_bytenr(child));
2036         }
2037         if (btrfs_node_ptr_generation(parent, slot) !=
2038             btrfs_header_generation(child)) {
2039                 ret = -EINVAL;
2040                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2041                         btrfs_header_generation(child),
2042                         btrfs_node_ptr_generation(parent, slot));
2043         }
2044         return ret;
2045 }
2046
2047 /*
2048  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2049  * in every fs or file tree check. Here we find its all root ids, and only check
2050  * it in the fs or file tree which has the smallest root id.
2051  */
2052 static int need_check(struct btrfs_root *root, struct ulist *roots)
2053 {
2054         struct rb_node *node;
2055         struct ulist_node *u;
2056
2057         if (roots->nnodes == 1)
2058                 return 1;
2059
2060         node = rb_first(&roots->root);
2061         u = rb_entry(node, struct ulist_node, rb_node);
2062         /*
2063          * current root id is not smallest, we skip it and let it be checked
2064          * in the fs or file tree who hash the smallest root id.
2065          */
2066         if (root->objectid != u->val)
2067                 return 0;
2068
2069         return 1;
2070 }
2071
2072 /*
2073  * for a tree node or leaf, we record its reference count, so later if we still
2074  * process this node or leaf, don't need to compute its reference count again.
2075  */
2076 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2077                              struct node_refs *nrefs, u64 level)
2078 {
2079         int check, ret;
2080         u64 refs;
2081         struct ulist *roots;
2082
2083         if (nrefs->bytenr[level] != bytenr) {
2084                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2085                                        level, 1, &refs, NULL);
2086                 if (ret < 0)
2087                         return ret;
2088
2089                 nrefs->bytenr[level] = bytenr;
2090                 nrefs->refs[level] = refs;
2091                 if (refs > 1) {
2092                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2093                                                    0, &roots);
2094                         if (ret)
2095                                 return -EIO;
2096
2097                         check = need_check(root, roots);
2098                         ulist_free(roots);
2099                         nrefs->need_check[level] = check;
2100                 } else {
2101                         nrefs->need_check[level] = 1;
2102                 }
2103         }
2104
2105         return 0;
2106 }
2107
2108 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2109                           struct walk_control *wc, int *level,
2110                           struct node_refs *nrefs)
2111 {
2112         enum btrfs_tree_block_status status;
2113         u64 bytenr;
2114         u64 ptr_gen;
2115         struct btrfs_fs_info *fs_info = root->fs_info;
2116         struct extent_buffer *next;
2117         struct extent_buffer *cur;
2118         int ret, err = 0;
2119         u64 refs;
2120
2121         WARN_ON(*level < 0);
2122         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2123
2124         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2125                 refs = nrefs->refs[*level];
2126                 ret = 0;
2127         } else {
2128                 ret = btrfs_lookup_extent_info(NULL, root,
2129                                        path->nodes[*level]->start,
2130                                        *level, 1, &refs, NULL);
2131                 if (ret < 0) {
2132                         err = ret;
2133                         goto out;
2134                 }
2135                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2136                 nrefs->refs[*level] = refs;
2137         }
2138
2139         if (refs > 1) {
2140                 ret = enter_shared_node(root, path->nodes[*level]->start,
2141                                         refs, wc, *level);
2142                 if (ret > 0) {
2143                         err = ret;
2144                         goto out;
2145                 }
2146         }
2147
2148         while (*level >= 0) {
2149                 WARN_ON(*level < 0);
2150                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2151                 cur = path->nodes[*level];
2152
2153                 if (btrfs_header_level(cur) != *level)
2154                         WARN_ON(1);
2155
2156                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2157                         break;
2158                 if (*level == 0) {
2159                         ret = process_one_leaf(root, cur, wc);
2160                         if (ret < 0)
2161                                 err = ret;
2162                         break;
2163                 }
2164                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2165                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2166
2167                 if (bytenr == nrefs->bytenr[*level - 1]) {
2168                         refs = nrefs->refs[*level - 1];
2169                 } else {
2170                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2171                                         *level - 1, 1, &refs, NULL);
2172                         if (ret < 0) {
2173                                 refs = 0;
2174                         } else {
2175                                 nrefs->bytenr[*level - 1] = bytenr;
2176                                 nrefs->refs[*level - 1] = refs;
2177                         }
2178                 }
2179
2180                 if (refs > 1) {
2181                         ret = enter_shared_node(root, bytenr, refs,
2182                                                 wc, *level - 1);
2183                         if (ret > 0) {
2184                                 path->slots[*level]++;
2185                                 continue;
2186                         }
2187                 }
2188
2189                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2190                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2191                         free_extent_buffer(next);
2192                         reada_walk_down(root, cur, path->slots[*level]);
2193                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2194                         if (!extent_buffer_uptodate(next)) {
2195                                 struct btrfs_key node_key;
2196
2197                                 btrfs_node_key_to_cpu(path->nodes[*level],
2198                                                       &node_key,
2199                                                       path->slots[*level]);
2200                                 btrfs_add_corrupt_extent_record(root->fs_info,
2201                                                 &node_key,
2202                                                 path->nodes[*level]->start,
2203                                                 root->fs_info->nodesize,
2204                                                 *level);
2205                                 err = -EIO;
2206                                 goto out;
2207                         }
2208                 }
2209
2210                 ret = check_child_node(cur, path->slots[*level], next);
2211                 if (ret) {
2212                         free_extent_buffer(next);
2213                         err = ret;
2214                         goto out;
2215                 }
2216
2217                 if (btrfs_is_leaf(next))
2218                         status = btrfs_check_leaf(root, NULL, next);
2219                 else
2220                         status = btrfs_check_node(root, NULL, next);
2221                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2222                         free_extent_buffer(next);
2223                         err = -EIO;
2224                         goto out;
2225                 }
2226
2227                 *level = *level - 1;
2228                 free_extent_buffer(path->nodes[*level]);
2229                 path->nodes[*level] = next;
2230                 path->slots[*level] = 0;
2231         }
2232 out:
2233         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2234         return err;
2235 }
2236
2237 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2238                             unsigned int ext_ref);
2239
2240 /*
2241  * Returns >0  Found error, should continue
2242  * Returns <0  Fatal error, must exit the whole check
2243  * Returns 0   No errors found
2244  */
2245 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2246                              int *level, struct node_refs *nrefs, int ext_ref)
2247 {
2248         enum btrfs_tree_block_status status;
2249         u64 bytenr;
2250         u64 ptr_gen;
2251         struct btrfs_fs_info *fs_info = root->fs_info;
2252         struct extent_buffer *next;
2253         struct extent_buffer *cur;
2254         int ret;
2255
2256         WARN_ON(*level < 0);
2257         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2258
2259         ret = update_nodes_refs(root, path->nodes[*level]->start,
2260                                 nrefs, *level);
2261         if (ret < 0)
2262                 return ret;
2263
2264         while (*level >= 0) {
2265                 WARN_ON(*level < 0);
2266                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2267                 cur = path->nodes[*level];
2268
2269                 if (btrfs_header_level(cur) != *level)
2270                         WARN_ON(1);
2271
2272                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2273                         break;
2274                 /* Don't forgot to check leaf/node validation */
2275                 if (*level == 0) {
2276                         ret = btrfs_check_leaf(root, NULL, cur);
2277                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2278                                 ret = -EIO;
2279                                 break;
2280                         }
2281                         ret = process_one_leaf_v2(root, path, nrefs,
2282                                                   level, ext_ref);
2283                         break;
2284                 } else {
2285                         ret = btrfs_check_node(root, NULL, cur);
2286                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2287                                 ret = -EIO;
2288                                 break;
2289                         }
2290                 }
2291                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2292                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2293
2294                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2295                 if (ret)
2296                         break;
2297                 if (!nrefs->need_check[*level - 1]) {
2298                         path->slots[*level]++;
2299                         continue;
2300                 }
2301
2302                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2303                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2304                         free_extent_buffer(next);
2305                         reada_walk_down(root, cur, path->slots[*level]);
2306                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2307                         if (!extent_buffer_uptodate(next)) {
2308                                 struct btrfs_key node_key;
2309
2310                                 btrfs_node_key_to_cpu(path->nodes[*level],
2311                                                       &node_key,
2312                                                       path->slots[*level]);
2313                                 btrfs_add_corrupt_extent_record(fs_info,
2314                                                 &node_key,
2315                                                 path->nodes[*level]->start,
2316                                                 fs_info->nodesize,
2317                                                 *level);
2318                                 ret = -EIO;
2319                                 break;
2320                         }
2321                 }
2322
2323                 ret = check_child_node(cur, path->slots[*level], next);
2324                 if (ret < 0) 
2325                         break;
2326
2327                 if (btrfs_is_leaf(next))
2328                         status = btrfs_check_leaf(root, NULL, next);
2329                 else
2330                         status = btrfs_check_node(root, NULL, next);
2331                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2332                         free_extent_buffer(next);
2333                         ret = -EIO;
2334                         break;
2335                 }
2336
2337                 *level = *level - 1;
2338                 free_extent_buffer(path->nodes[*level]);
2339                 path->nodes[*level] = next;
2340                 path->slots[*level] = 0;
2341         }
2342         return ret;
2343 }
2344
2345 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2346                         struct walk_control *wc, int *level)
2347 {
2348         int i;
2349         struct extent_buffer *leaf;
2350
2351         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2352                 leaf = path->nodes[i];
2353                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2354                         path->slots[i]++;
2355                         *level = i;
2356                         return 0;
2357                 } else {
2358                         free_extent_buffer(path->nodes[*level]);
2359                         path->nodes[*level] = NULL;
2360                         BUG_ON(*level > wc->active_node);
2361                         if (*level == wc->active_node)
2362                                 leave_shared_node(root, wc, *level);
2363                         *level = i + 1;
2364                 }
2365         }
2366         return 1;
2367 }
2368
2369 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2370                            int *level)
2371 {
2372         int i;
2373         struct extent_buffer *leaf;
2374
2375         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2376                 leaf = path->nodes[i];
2377                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2378                         path->slots[i]++;
2379                         *level = i;
2380                         return 0;
2381                 } else {
2382                         free_extent_buffer(path->nodes[*level]);
2383                         path->nodes[*level] = NULL;
2384                         *level = i + 1;
2385                 }
2386         }
2387         return 1;
2388 }
2389
2390 static int check_root_dir(struct inode_record *rec)
2391 {
2392         struct inode_backref *backref;
2393         int ret = -1;
2394
2395         if (!rec->found_inode_item || rec->errors)
2396                 goto out;
2397         if (rec->nlink != 1 || rec->found_link != 0)
2398                 goto out;
2399         if (list_empty(&rec->backrefs))
2400                 goto out;
2401         backref = to_inode_backref(rec->backrefs.next);
2402         if (!backref->found_inode_ref)
2403                 goto out;
2404         if (backref->index != 0 || backref->namelen != 2 ||
2405             memcmp(backref->name, "..", 2))
2406                 goto out;
2407         if (backref->found_dir_index || backref->found_dir_item)
2408                 goto out;
2409         ret = 0;
2410 out:
2411         return ret;
2412 }
2413
2414 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2415                               struct btrfs_root *root, struct btrfs_path *path,
2416                               struct inode_record *rec)
2417 {
2418         struct btrfs_inode_item *ei;
2419         struct btrfs_key key;
2420         int ret;
2421
2422         key.objectid = rec->ino;
2423         key.type = BTRFS_INODE_ITEM_KEY;
2424         key.offset = (u64)-1;
2425
2426         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2427         if (ret < 0)
2428                 goto out;
2429         if (ret) {
2430                 if (!path->slots[0]) {
2431                         ret = -ENOENT;
2432                         goto out;
2433                 }
2434                 path->slots[0]--;
2435                 ret = 0;
2436         }
2437         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2438         if (key.objectid != rec->ino) {
2439                 ret = -ENOENT;
2440                 goto out;
2441         }
2442
2443         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2444                             struct btrfs_inode_item);
2445         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2446         btrfs_mark_buffer_dirty(path->nodes[0]);
2447         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2448         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2449                root->root_key.objectid);
2450 out:
2451         btrfs_release_path(path);
2452         return ret;
2453 }
2454
2455 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2456                                     struct btrfs_root *root,
2457                                     struct btrfs_path *path,
2458                                     struct inode_record *rec)
2459 {
2460         int ret;
2461
2462         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2463         btrfs_release_path(path);
2464         if (!ret)
2465                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2466         return ret;
2467 }
2468
2469 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2470                                struct btrfs_root *root,
2471                                struct btrfs_path *path,
2472                                struct inode_record *rec)
2473 {
2474         struct btrfs_inode_item *ei;
2475         struct btrfs_key key;
2476         int ret = 0;
2477
2478         key.objectid = rec->ino;
2479         key.type = BTRFS_INODE_ITEM_KEY;
2480         key.offset = 0;
2481
2482         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2483         if (ret) {
2484                 if (ret > 0)
2485                         ret = -ENOENT;
2486                 goto out;
2487         }
2488
2489         /* Since ret == 0, no need to check anything */
2490         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2491                             struct btrfs_inode_item);
2492         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2493         btrfs_mark_buffer_dirty(path->nodes[0]);
2494         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2495         printf("reset nbytes for ino %llu root %llu\n",
2496                rec->ino, root->root_key.objectid);
2497 out:
2498         btrfs_release_path(path);
2499         return ret;
2500 }
2501
2502 static int add_missing_dir_index(struct btrfs_root *root,
2503                                  struct cache_tree *inode_cache,
2504                                  struct inode_record *rec,
2505                                  struct inode_backref *backref)
2506 {
2507         struct btrfs_path path;
2508         struct btrfs_trans_handle *trans;
2509         struct btrfs_dir_item *dir_item;
2510         struct extent_buffer *leaf;
2511         struct btrfs_key key;
2512         struct btrfs_disk_key disk_key;
2513         struct inode_record *dir_rec;
2514         unsigned long name_ptr;
2515         u32 data_size = sizeof(*dir_item) + backref->namelen;
2516         int ret;
2517
2518         trans = btrfs_start_transaction(root, 1);
2519         if (IS_ERR(trans))
2520                 return PTR_ERR(trans);
2521
2522         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2523                 (unsigned long long)rec->ino);
2524
2525         btrfs_init_path(&path);
2526         key.objectid = backref->dir;
2527         key.type = BTRFS_DIR_INDEX_KEY;
2528         key.offset = backref->index;
2529         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2530         BUG_ON(ret);
2531
2532         leaf = path.nodes[0];
2533         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2534
2535         disk_key.objectid = cpu_to_le64(rec->ino);
2536         disk_key.type = BTRFS_INODE_ITEM_KEY;
2537         disk_key.offset = 0;
2538
2539         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2540         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2541         btrfs_set_dir_data_len(leaf, dir_item, 0);
2542         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2543         name_ptr = (unsigned long)(dir_item + 1);
2544         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2545         btrfs_mark_buffer_dirty(leaf);
2546         btrfs_release_path(&path);
2547         btrfs_commit_transaction(trans, root);
2548
2549         backref->found_dir_index = 1;
2550         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2551         BUG_ON(IS_ERR(dir_rec));
2552         if (!dir_rec)
2553                 return 0;
2554         dir_rec->found_size += backref->namelen;
2555         if (dir_rec->found_size == dir_rec->isize &&
2556             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2557                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2558         if (dir_rec->found_size != dir_rec->isize)
2559                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2560
2561         return 0;
2562 }
2563
2564 static int delete_dir_index(struct btrfs_root *root,
2565                             struct inode_backref *backref)
2566 {
2567         struct btrfs_trans_handle *trans;
2568         struct btrfs_dir_item *di;
2569         struct btrfs_path path;
2570         int ret = 0;
2571
2572         trans = btrfs_start_transaction(root, 1);
2573         if (IS_ERR(trans))
2574                 return PTR_ERR(trans);
2575
2576         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2577                 (unsigned long long)backref->dir,
2578                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2579                 (unsigned long long)root->objectid);
2580
2581         btrfs_init_path(&path);
2582         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2583                                     backref->name, backref->namelen,
2584                                     backref->index, -1);
2585         if (IS_ERR(di)) {
2586                 ret = PTR_ERR(di);
2587                 btrfs_release_path(&path);
2588                 btrfs_commit_transaction(trans, root);
2589                 if (ret == -ENOENT)
2590                         return 0;
2591                 return ret;
2592         }
2593
2594         if (!di)
2595                 ret = btrfs_del_item(trans, root, &path);
2596         else
2597                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2598         BUG_ON(ret);
2599         btrfs_release_path(&path);
2600         btrfs_commit_transaction(trans, root);
2601         return ret;
2602 }
2603
2604 static int create_inode_item(struct btrfs_root *root,
2605                              struct inode_record *rec,
2606                              int root_dir)
2607 {
2608         struct btrfs_trans_handle *trans;
2609         struct btrfs_inode_item inode_item;
2610         time_t now = time(NULL);
2611         int ret;
2612
2613         trans = btrfs_start_transaction(root, 1);
2614         if (IS_ERR(trans)) {
2615                 ret = PTR_ERR(trans);
2616                 return ret;
2617         }
2618
2619         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2620                 "be incomplete, please check permissions and content after "
2621                 "the fsck completes.\n", (unsigned long long)root->objectid,
2622                 (unsigned long long)rec->ino);
2623
2624         memset(&inode_item, 0, sizeof(inode_item));
2625         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2626         if (root_dir)
2627                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2628         else
2629                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2630         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2631         if (rec->found_dir_item) {
2632                 if (rec->found_file_extent)
2633                         fprintf(stderr, "root %llu inode %llu has both a dir "
2634                                 "item and extents, unsure if it is a dir or a "
2635                                 "regular file so setting it as a directory\n",
2636                                 (unsigned long long)root->objectid,
2637                                 (unsigned long long)rec->ino);
2638                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2639                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2640         } else if (!rec->found_dir_item) {
2641                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2642                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2643         }
2644         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2645         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2646         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2647         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2648         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2649         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2650         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2651         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2652
2653         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2654         BUG_ON(ret);
2655         btrfs_commit_transaction(trans, root);
2656         return 0;
2657 }
2658
2659 static int repair_inode_backrefs(struct btrfs_root *root,
2660                                  struct inode_record *rec,
2661                                  struct cache_tree *inode_cache,
2662                                  int delete)
2663 {
2664         struct inode_backref *tmp, *backref;
2665         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2666         int ret = 0;
2667         int repaired = 0;
2668
2669         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2670                 if (!delete && rec->ino == root_dirid) {
2671                         if (!rec->found_inode_item) {
2672                                 ret = create_inode_item(root, rec, 1);
2673                                 if (ret)
2674                                         break;
2675                                 repaired++;
2676                         }
2677                 }
2678
2679                 /* Index 0 for root dir's are special, don't mess with it */
2680                 if (rec->ino == root_dirid && backref->index == 0)
2681                         continue;
2682
2683                 if (delete &&
2684                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2685                      (backref->found_dir_index && backref->found_inode_ref &&
2686                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2687                         ret = delete_dir_index(root, backref);
2688                         if (ret)
2689                                 break;
2690                         repaired++;
2691                         list_del(&backref->list);
2692                         free(backref);
2693                         continue;
2694                 }
2695
2696                 if (!delete && !backref->found_dir_index &&
2697                     backref->found_dir_item && backref->found_inode_ref) {
2698                         ret = add_missing_dir_index(root, inode_cache, rec,
2699                                                     backref);
2700                         if (ret)
2701                                 break;
2702                         repaired++;
2703                         if (backref->found_dir_item &&
2704                             backref->found_dir_index) {
2705                                 if (!backref->errors &&
2706                                     backref->found_inode_ref) {
2707                                         list_del(&backref->list);
2708                                         free(backref);
2709                                         continue;
2710                                 }
2711                         }
2712                 }
2713
2714                 if (!delete && (!backref->found_dir_index &&
2715                                 !backref->found_dir_item &&
2716                                 backref->found_inode_ref)) {
2717                         struct btrfs_trans_handle *trans;
2718                         struct btrfs_key location;
2719
2720                         ret = check_dir_conflict(root, backref->name,
2721                                                  backref->namelen,
2722                                                  backref->dir,
2723                                                  backref->index);
2724                         if (ret) {
2725                                 /*
2726                                  * let nlink fixing routine to handle it,
2727                                  * which can do it better.
2728                                  */
2729                                 ret = 0;
2730                                 break;
2731                         }
2732                         location.objectid = rec->ino;
2733                         location.type = BTRFS_INODE_ITEM_KEY;
2734                         location.offset = 0;
2735
2736                         trans = btrfs_start_transaction(root, 1);
2737                         if (IS_ERR(trans)) {
2738                                 ret = PTR_ERR(trans);
2739                                 break;
2740                         }
2741                         fprintf(stderr, "adding missing dir index/item pair "
2742                                 "for inode %llu\n",
2743                                 (unsigned long long)rec->ino);
2744                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2745                                                     backref->namelen,
2746                                                     backref->dir, &location,
2747                                                     imode_to_type(rec->imode),
2748                                                     backref->index);
2749                         BUG_ON(ret);
2750                         btrfs_commit_transaction(trans, root);
2751                         repaired++;
2752                 }
2753
2754                 if (!delete && (backref->found_inode_ref &&
2755                                 backref->found_dir_index &&
2756                                 backref->found_dir_item &&
2757                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2758                                 !rec->found_inode_item)) {
2759                         ret = create_inode_item(root, rec, 0);
2760                         if (ret)
2761                                 break;
2762                         repaired++;
2763                 }
2764
2765         }
2766         return ret ? ret : repaired;
2767 }
2768
2769 /*
2770  * To determine the file type for nlink/inode_item repair
2771  *
2772  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2773  * Return -ENOENT if file type is not found.
2774  */
2775 static int find_file_type(struct inode_record *rec, u8 *type)
2776 {
2777         struct inode_backref *backref;
2778
2779         /* For inode item recovered case */
2780         if (rec->found_inode_item) {
2781                 *type = imode_to_type(rec->imode);
2782                 return 0;
2783         }
2784
2785         list_for_each_entry(backref, &rec->backrefs, list) {
2786                 if (backref->found_dir_index || backref->found_dir_item) {
2787                         *type = backref->filetype;
2788                         return 0;
2789                 }
2790         }
2791         return -ENOENT;
2792 }
2793
2794 /*
2795  * To determine the file name for nlink repair
2796  *
2797  * Return 0 if file name is found, set name and namelen.
2798  * Return -ENOENT if file name is not found.
2799  */
2800 static int find_file_name(struct inode_record *rec,
2801                           char *name, int *namelen)
2802 {
2803         struct inode_backref *backref;
2804
2805         list_for_each_entry(backref, &rec->backrefs, list) {
2806                 if (backref->found_dir_index || backref->found_dir_item ||
2807                     backref->found_inode_ref) {
2808                         memcpy(name, backref->name, backref->namelen);
2809                         *namelen = backref->namelen;
2810                         return 0;
2811                 }
2812         }
2813         return -ENOENT;
2814 }
2815
2816 /* Reset the nlink of the inode to the correct one */
2817 static int reset_nlink(struct btrfs_trans_handle *trans,
2818                        struct btrfs_root *root,
2819                        struct btrfs_path *path,
2820                        struct inode_record *rec)
2821 {
2822         struct inode_backref *backref;
2823         struct inode_backref *tmp;
2824         struct btrfs_key key;
2825         struct btrfs_inode_item *inode_item;
2826         int ret = 0;
2827
2828         /* We don't believe this either, reset it and iterate backref */
2829         rec->found_link = 0;
2830
2831         /* Remove all backref including the valid ones */
2832         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2833                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2834                                    backref->index, backref->name,
2835                                    backref->namelen, 0);
2836                 if (ret < 0)
2837                         goto out;
2838
2839                 /* remove invalid backref, so it won't be added back */
2840                 if (!(backref->found_dir_index &&
2841                       backref->found_dir_item &&
2842                       backref->found_inode_ref)) {
2843                         list_del(&backref->list);
2844                         free(backref);
2845                 } else {
2846                         rec->found_link++;
2847                 }
2848         }
2849
2850         /* Set nlink to 0 */
2851         key.objectid = rec->ino;
2852         key.type = BTRFS_INODE_ITEM_KEY;
2853         key.offset = 0;
2854         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2855         if (ret < 0)
2856                 goto out;
2857         if (ret > 0) {
2858                 ret = -ENOENT;
2859                 goto out;
2860         }
2861         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2862                                     struct btrfs_inode_item);
2863         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2864         btrfs_mark_buffer_dirty(path->nodes[0]);
2865         btrfs_release_path(path);
2866
2867         /*
2868          * Add back valid inode_ref/dir_item/dir_index,
2869          * add_link() will handle the nlink inc, so new nlink must be correct
2870          */
2871         list_for_each_entry(backref, &rec->backrefs, list) {
2872                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2873                                      backref->name, backref->namelen,
2874                                      backref->filetype, &backref->index, 1);
2875                 if (ret < 0)
2876                         goto out;
2877         }
2878 out:
2879         btrfs_release_path(path);
2880         return ret;
2881 }
2882
2883 static int get_highest_inode(struct btrfs_trans_handle *trans,
2884                                 struct btrfs_root *root,
2885                                 struct btrfs_path *path,
2886                                 u64 *highest_ino)
2887 {
2888         struct btrfs_key key, found_key;
2889         int ret;
2890
2891         btrfs_init_path(path);
2892         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2893         key.offset = -1;
2894         key.type = BTRFS_INODE_ITEM_KEY;
2895         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2896         if (ret == 1) {
2897                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2898                                 path->slots[0] - 1);
2899                 *highest_ino = found_key.objectid;
2900                 ret = 0;
2901         }
2902         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2903                 ret = -EOVERFLOW;
2904         btrfs_release_path(path);
2905         return ret;
2906 }
2907
2908 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2909                                struct btrfs_root *root,
2910                                struct btrfs_path *path,
2911                                struct inode_record *rec)
2912 {
2913         char *dir_name = "lost+found";
2914         char namebuf[BTRFS_NAME_LEN] = {0};
2915         u64 lost_found_ino;
2916         u32 mode = 0700;
2917         u8 type = 0;
2918         int namelen = 0;
2919         int name_recovered = 0;
2920         int type_recovered = 0;
2921         int ret = 0;
2922
2923         /*
2924          * Get file name and type first before these invalid inode ref
2925          * are deleted by remove_all_invalid_backref()
2926          */
2927         name_recovered = !find_file_name(rec, namebuf, &namelen);
2928         type_recovered = !find_file_type(rec, &type);
2929
2930         if (!name_recovered) {
2931                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2932                        rec->ino, rec->ino);
2933                 namelen = count_digits(rec->ino);
2934                 sprintf(namebuf, "%llu", rec->ino);
2935                 name_recovered = 1;
2936         }
2937         if (!type_recovered) {
2938                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2939                        rec->ino);
2940                 type = BTRFS_FT_REG_FILE;
2941                 type_recovered = 1;
2942         }
2943
2944         ret = reset_nlink(trans, root, path, rec);
2945         if (ret < 0) {
2946                 fprintf(stderr,
2947                         "Failed to reset nlink for inode %llu: %s\n",
2948                         rec->ino, strerror(-ret));
2949                 goto out;
2950         }
2951
2952         if (rec->found_link == 0) {
2953                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2954                 if (ret < 0)
2955                         goto out;
2956                 lost_found_ino++;
2957                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2958                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2959                                   mode);
2960                 if (ret < 0) {
2961                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2962                                 dir_name, strerror(-ret));
2963                         goto out;
2964                 }
2965                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2966                                      namebuf, namelen, type, NULL, 1);
2967                 /*
2968                  * Add ".INO" suffix several times to handle case where
2969                  * "FILENAME.INO" is already taken by another file.
2970                  */
2971                 while (ret == -EEXIST) {
2972                         /*
2973                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2974                          */
2975                         if (namelen + count_digits(rec->ino) + 1 >
2976                             BTRFS_NAME_LEN) {
2977                                 ret = -EFBIG;
2978                                 goto out;
2979                         }
2980                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2981                                  ".%llu", rec->ino);
2982                         namelen += count_digits(rec->ino) + 1;
2983                         ret = btrfs_add_link(trans, root, rec->ino,
2984                                              lost_found_ino, namebuf,
2985                                              namelen, type, NULL, 1);
2986                 }
2987                 if (ret < 0) {
2988                         fprintf(stderr,
2989                                 "Failed to link the inode %llu to %s dir: %s\n",
2990                                 rec->ino, dir_name, strerror(-ret));
2991                         goto out;
2992                 }
2993                 /*
2994                  * Just increase the found_link, don't actually add the
2995                  * backref. This will make things easier and this inode
2996                  * record will be freed after the repair is done.
2997                  * So fsck will not report problem about this inode.
2998                  */
2999                 rec->found_link++;
3000                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3001                        namelen, namebuf, dir_name);
3002         }
3003         printf("Fixed the nlink of inode %llu\n", rec->ino);
3004 out:
3005         /*
3006          * Clear the flag anyway, or we will loop forever for the same inode
3007          * as it will not be removed from the bad inode list and the dead loop
3008          * happens.
3009          */
3010         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3011         btrfs_release_path(path);
3012         return ret;
3013 }
3014
3015 /*
3016  * Check if there is any normal(reg or prealloc) file extent for given
3017  * ino.
3018  * This is used to determine the file type when neither its dir_index/item or
3019  * inode_item exists.
3020  *
3021  * This will *NOT* report error, if any error happens, just consider it does
3022  * not have any normal file extent.
3023  */
3024 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3025 {
3026         struct btrfs_path path;
3027         struct btrfs_key key;
3028         struct btrfs_key found_key;
3029         struct btrfs_file_extent_item *fi;
3030         u8 type;
3031         int ret = 0;
3032
3033         btrfs_init_path(&path);
3034         key.objectid = ino;
3035         key.type = BTRFS_EXTENT_DATA_KEY;
3036         key.offset = 0;
3037
3038         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3039         if (ret < 0) {
3040                 ret = 0;
3041                 goto out;
3042         }
3043         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3044                 ret = btrfs_next_leaf(root, &path);
3045                 if (ret) {
3046                         ret = 0;
3047                         goto out;
3048                 }
3049         }
3050         while (1) {
3051                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3052                                       path.slots[0]);
3053                 if (found_key.objectid != ino ||
3054                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3055                         break;
3056                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3057                                     struct btrfs_file_extent_item);
3058                 type = btrfs_file_extent_type(path.nodes[0], fi);
3059                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3060                         ret = 1;
3061                         goto out;
3062                 }
3063         }
3064 out:
3065         btrfs_release_path(&path);
3066         return ret;
3067 }
3068
3069 static u32 btrfs_type_to_imode(u8 type)
3070 {
3071         static u32 imode_by_btrfs_type[] = {
3072                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3073                 [BTRFS_FT_DIR]          = S_IFDIR,
3074                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3075                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3076                 [BTRFS_FT_FIFO]         = S_IFIFO,
3077                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3078                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3079         };
3080
3081         return imode_by_btrfs_type[(type)];
3082 }
3083
3084 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3085                                 struct btrfs_root *root,
3086                                 struct btrfs_path *path,
3087                                 struct inode_record *rec)
3088 {
3089         u8 filetype;
3090         u32 mode = 0700;
3091         int type_recovered = 0;
3092         int ret = 0;
3093
3094         printf("Trying to rebuild inode:%llu\n", rec->ino);
3095
3096         type_recovered = !find_file_type(rec, &filetype);
3097
3098         /*
3099          * Try to determine inode type if type not found.
3100          *
3101          * For found regular file extent, it must be FILE.
3102          * For found dir_item/index, it must be DIR.
3103          *
3104          * For undetermined one, use FILE as fallback.
3105          *
3106          * TODO:
3107          * 1. If found backref(inode_index/item is already handled) to it,
3108          *    it must be DIR.
3109          *    Need new inode-inode ref structure to allow search for that.
3110          */
3111         if (!type_recovered) {
3112                 if (rec->found_file_extent &&
3113                     find_normal_file_extent(root, rec->ino)) {
3114                         type_recovered = 1;
3115                         filetype = BTRFS_FT_REG_FILE;
3116                 } else if (rec->found_dir_item) {
3117                         type_recovered = 1;
3118                         filetype = BTRFS_FT_DIR;
3119                 } else if (!list_empty(&rec->orphan_extents)) {
3120                         type_recovered = 1;
3121                         filetype = BTRFS_FT_REG_FILE;
3122                 } else{
3123                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3124                                rec->ino);
3125                         type_recovered = 1;
3126                         filetype = BTRFS_FT_REG_FILE;
3127                 }
3128         }
3129
3130         ret = btrfs_new_inode(trans, root, rec->ino,
3131                               mode | btrfs_type_to_imode(filetype));
3132         if (ret < 0)
3133                 goto out;
3134
3135         /*
3136          * Here inode rebuild is done, we only rebuild the inode item,
3137          * don't repair the nlink(like move to lost+found).
3138          * That is the job of nlink repair.
3139          *
3140          * We just fill the record and return
3141          */
3142         rec->found_dir_item = 1;
3143         rec->imode = mode | btrfs_type_to_imode(filetype);
3144         rec->nlink = 0;
3145         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3146         /* Ensure the inode_nlinks repair function will be called */
3147         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3148 out:
3149         return ret;
3150 }
3151
3152 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3153                                       struct btrfs_root *root,
3154                                       struct btrfs_path *path,
3155                                       struct inode_record *rec)
3156 {
3157         struct orphan_data_extent *orphan;
3158         struct orphan_data_extent *tmp;
3159         int ret = 0;
3160
3161         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3162                 /*
3163                  * Check for conflicting file extents
3164                  *
3165                  * Here we don't know whether the extents is compressed or not,
3166                  * so we can only assume it not compressed nor data offset,
3167                  * and use its disk_len as extent length.
3168                  */
3169                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3170                                        orphan->offset, orphan->disk_len, 0);
3171                 btrfs_release_path(path);
3172                 if (ret < 0)
3173                         goto out;
3174                 if (!ret) {
3175                         fprintf(stderr,
3176                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3177                                 orphan->disk_bytenr, orphan->disk_len);
3178                         ret = btrfs_free_extent(trans,
3179                                         root->fs_info->extent_root,
3180                                         orphan->disk_bytenr, orphan->disk_len,
3181                                         0, root->objectid, orphan->objectid,
3182                                         orphan->offset);
3183                         if (ret < 0)
3184                                 goto out;
3185                 }
3186                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3187                                 orphan->offset, orphan->disk_bytenr,
3188                                 orphan->disk_len, orphan->disk_len);
3189                 if (ret < 0)
3190                         goto out;
3191
3192                 /* Update file size info */
3193                 rec->found_size += orphan->disk_len;
3194                 if (rec->found_size == rec->nbytes)
3195                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3196
3197                 /* Update the file extent hole info too */
3198                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3199                                            orphan->disk_len);
3200                 if (ret < 0)
3201                         goto out;
3202                 if (RB_EMPTY_ROOT(&rec->holes))
3203                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3204
3205                 list_del(&orphan->list);
3206                 free(orphan);
3207         }
3208         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3209 out:
3210         return ret;
3211 }
3212
3213 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3214                                         struct btrfs_root *root,
3215                                         struct btrfs_path *path,
3216                                         struct inode_record *rec)
3217 {
3218         struct rb_node *node;
3219         struct file_extent_hole *hole;
3220         int found = 0;
3221         int ret = 0;
3222
3223         node = rb_first(&rec->holes);
3224
3225         while (node) {
3226                 found = 1;
3227                 hole = rb_entry(node, struct file_extent_hole, node);
3228                 ret = btrfs_punch_hole(trans, root, rec->ino,
3229                                        hole->start, hole->len);
3230                 if (ret < 0)
3231                         goto out;
3232                 ret = del_file_extent_hole(&rec->holes, hole->start,
3233                                            hole->len);
3234                 if (ret < 0)
3235                         goto out;
3236                 if (RB_EMPTY_ROOT(&rec->holes))
3237                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3238                 node = rb_first(&rec->holes);
3239         }
3240         /* special case for a file losing all its file extent */
3241         if (!found) {
3242                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3243                                        round_up(rec->isize,
3244                                                 root->fs_info->sectorsize));
3245                 if (ret < 0)
3246                         goto out;
3247         }
3248         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3249                rec->ino, root->objectid);
3250 out:
3251         return ret;
3252 }
3253
3254 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3255 {
3256         struct btrfs_trans_handle *trans;
3257         struct btrfs_path path;
3258         int ret = 0;
3259
3260         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3261                              I_ERR_NO_ORPHAN_ITEM |
3262                              I_ERR_LINK_COUNT_WRONG |
3263                              I_ERR_NO_INODE_ITEM |
3264                              I_ERR_FILE_EXTENT_ORPHAN |
3265                              I_ERR_FILE_EXTENT_DISCOUNT|
3266                              I_ERR_FILE_NBYTES_WRONG)))
3267                 return rec->errors;
3268
3269         /*
3270          * For nlink repair, it may create a dir and add link, so
3271          * 2 for parent(256)'s dir_index and dir_item
3272          * 2 for lost+found dir's inode_item and inode_ref
3273          * 1 for the new inode_ref of the file
3274          * 2 for lost+found dir's dir_index and dir_item for the file
3275          */
3276         trans = btrfs_start_transaction(root, 7);
3277         if (IS_ERR(trans))
3278                 return PTR_ERR(trans);
3279
3280         btrfs_init_path(&path);
3281         if (rec->errors & I_ERR_NO_INODE_ITEM)
3282                 ret = repair_inode_no_item(trans, root, &path, rec);
3283         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3284                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3285         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3286                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3287         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3288                 ret = repair_inode_isize(trans, root, &path, rec);
3289         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3290                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3291         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3292                 ret = repair_inode_nlinks(trans, root, &path, rec);
3293         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3294                 ret = repair_inode_nbytes(trans, root, &path, rec);
3295         btrfs_commit_transaction(trans, root);
3296         btrfs_release_path(&path);
3297         return ret;
3298 }
3299
3300 static int check_inode_recs(struct btrfs_root *root,
3301                             struct cache_tree *inode_cache)
3302 {
3303         struct cache_extent *cache;
3304         struct ptr_node *node;
3305         struct inode_record *rec;
3306         struct inode_backref *backref;
3307         int stage = 0;
3308         int ret = 0;
3309         int err = 0;
3310         u64 error = 0;
3311         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3312
3313         if (btrfs_root_refs(&root->root_item) == 0) {
3314                 if (!cache_tree_empty(inode_cache))
3315                         fprintf(stderr, "warning line %d\n", __LINE__);
3316                 return 0;
3317         }
3318
3319         /*
3320          * We need to repair backrefs first because we could change some of the
3321          * errors in the inode recs.
3322          *
3323          * We also need to go through and delete invalid backrefs first and then
3324          * add the correct ones second.  We do this because we may get EEXIST
3325          * when adding back the correct index because we hadn't yet deleted the
3326          * invalid index.
3327          *
3328          * For example, if we were missing a dir index then the directories
3329          * isize would be wrong, so if we fixed the isize to what we thought it
3330          * would be and then fixed the backref we'd still have a invalid fs, so
3331          * we need to add back the dir index and then check to see if the isize
3332          * is still wrong.
3333          */
3334         while (stage < 3) {
3335                 stage++;
3336                 if (stage == 3 && !err)
3337                         break;
3338
3339                 cache = search_cache_extent(inode_cache, 0);
3340                 while (repair && cache) {
3341                         node = container_of(cache, struct ptr_node, cache);
3342                         rec = node->data;
3343                         cache = next_cache_extent(cache);
3344
3345                         /* Need to free everything up and rescan */
3346                         if (stage == 3) {
3347                                 remove_cache_extent(inode_cache, &node->cache);
3348                                 free(node);
3349                                 free_inode_rec(rec);
3350                                 continue;
3351                         }
3352
3353                         if (list_empty(&rec->backrefs))
3354                                 continue;
3355
3356                         ret = repair_inode_backrefs(root, rec, inode_cache,
3357                                                     stage == 1);
3358                         if (ret < 0) {
3359                                 err = ret;
3360                                 stage = 2;
3361                                 break;
3362                         } if (ret > 0) {
3363                                 err = -EAGAIN;
3364                         }
3365                 }
3366         }
3367         if (err)
3368                 return err;
3369
3370         rec = get_inode_rec(inode_cache, root_dirid, 0);
3371         BUG_ON(IS_ERR(rec));
3372         if (rec) {
3373                 ret = check_root_dir(rec);
3374                 if (ret) {
3375                         fprintf(stderr, "root %llu root dir %llu error\n",
3376                                 (unsigned long long)root->root_key.objectid,
3377                                 (unsigned long long)root_dirid);
3378                         print_inode_error(root, rec);
3379                         error++;
3380                 }
3381         } else {
3382                 if (repair) {
3383                         struct btrfs_trans_handle *trans;
3384
3385                         trans = btrfs_start_transaction(root, 1);
3386                         if (IS_ERR(trans)) {
3387                                 err = PTR_ERR(trans);
3388                                 return err;
3389                         }
3390
3391                         fprintf(stderr,
3392                                 "root %llu missing its root dir, recreating\n",
3393                                 (unsigned long long)root->objectid);
3394
3395                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3396                         BUG_ON(ret);
3397
3398                         btrfs_commit_transaction(trans, root);
3399                         return -EAGAIN;
3400                 }
3401
3402                 fprintf(stderr, "root %llu root dir %llu not found\n",
3403                         (unsigned long long)root->root_key.objectid,
3404                         (unsigned long long)root_dirid);
3405         }
3406
3407         while (1) {
3408                 cache = search_cache_extent(inode_cache, 0);
3409                 if (!cache)
3410                         break;
3411                 node = container_of(cache, struct ptr_node, cache);
3412                 rec = node->data;
3413                 remove_cache_extent(inode_cache, &node->cache);
3414                 free(node);
3415                 if (rec->ino == root_dirid ||
3416                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3417                         free_inode_rec(rec);
3418                         continue;
3419                 }
3420
3421                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3422                         ret = check_orphan_item(root, rec->ino);
3423                         if (ret == 0)
3424                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3425                         if (can_free_inode_rec(rec)) {
3426                                 free_inode_rec(rec);
3427                                 continue;
3428                         }
3429                 }
3430
3431                 if (!rec->found_inode_item)
3432                         rec->errors |= I_ERR_NO_INODE_ITEM;
3433                 if (rec->found_link != rec->nlink)
3434                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3435                 if (repair) {
3436                         ret = try_repair_inode(root, rec);
3437                         if (ret == 0 && can_free_inode_rec(rec)) {
3438                                 free_inode_rec(rec);
3439                                 continue;
3440                         }
3441                         ret = 0;
3442                 }
3443
3444                 if (!(repair && ret == 0))
3445                         error++;
3446                 print_inode_error(root, rec);
3447                 list_for_each_entry(backref, &rec->backrefs, list) {
3448                         if (!backref->found_dir_item)
3449                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3450                         if (!backref->found_dir_index)
3451                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3452                         if (!backref->found_inode_ref)
3453                                 backref->errors |= REF_ERR_NO_INODE_REF;
3454                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3455                                 " namelen %u name %s filetype %d errors %x",
3456                                 (unsigned long long)backref->dir,
3457                                 (unsigned long long)backref->index,
3458                                 backref->namelen, backref->name,
3459                                 backref->filetype, backref->errors);
3460                         print_ref_error(backref->errors);
3461                 }
3462                 free_inode_rec(rec);
3463         }
3464         return (error > 0) ? -1 : 0;
3465 }
3466
3467 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3468                                         u64 objectid)
3469 {
3470         struct cache_extent *cache;
3471         struct root_record *rec = NULL;
3472         int ret;
3473
3474         cache = lookup_cache_extent(root_cache, objectid, 1);
3475         if (cache) {
3476                 rec = container_of(cache, struct root_record, cache);
3477         } else {
3478                 rec = calloc(1, sizeof(*rec));
3479                 if (!rec)
3480                         return ERR_PTR(-ENOMEM);
3481                 rec->objectid = objectid;
3482                 INIT_LIST_HEAD(&rec->backrefs);
3483                 rec->cache.start = objectid;
3484                 rec->cache.size = 1;
3485
3486                 ret = insert_cache_extent(root_cache, &rec->cache);
3487                 if (ret)
3488                         return ERR_PTR(-EEXIST);
3489         }
3490         return rec;
3491 }
3492
3493 static struct root_backref *get_root_backref(struct root_record *rec,
3494                                              u64 ref_root, u64 dir, u64 index,
3495                                              const char *name, int namelen)
3496 {
3497         struct root_backref *backref;
3498
3499         list_for_each_entry(backref, &rec->backrefs, list) {
3500                 if (backref->ref_root != ref_root || backref->dir != dir ||
3501                     backref->namelen != namelen)
3502                         continue;
3503                 if (memcmp(name, backref->name, namelen))
3504                         continue;
3505                 return backref;
3506         }
3507
3508         backref = calloc(1, sizeof(*backref) + namelen + 1);
3509         if (!backref)
3510                 return NULL;
3511         backref->ref_root = ref_root;
3512         backref->dir = dir;
3513         backref->index = index;
3514         backref->namelen = namelen;
3515         memcpy(backref->name, name, namelen);
3516         backref->name[namelen] = '\0';
3517         list_add_tail(&backref->list, &rec->backrefs);
3518         return backref;
3519 }
3520
3521 static void free_root_record(struct cache_extent *cache)
3522 {
3523         struct root_record *rec;
3524         struct root_backref *backref;
3525
3526         rec = container_of(cache, struct root_record, cache);
3527         while (!list_empty(&rec->backrefs)) {
3528                 backref = to_root_backref(rec->backrefs.next);
3529                 list_del(&backref->list);
3530                 free(backref);
3531         }
3532
3533         free(rec);
3534 }
3535
3536 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3537
3538 static int add_root_backref(struct cache_tree *root_cache,
3539                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3540                             const char *name, int namelen,
3541                             int item_type, int errors)
3542 {
3543         struct root_record *rec;
3544         struct root_backref *backref;
3545
3546         rec = get_root_rec(root_cache, root_id);
3547         BUG_ON(IS_ERR(rec));
3548         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3549         BUG_ON(!backref);
3550
3551         backref->errors |= errors;
3552
3553         if (item_type != BTRFS_DIR_ITEM_KEY) {
3554                 if (backref->found_dir_index || backref->found_back_ref ||
3555                     backref->found_forward_ref) {
3556                         if (backref->index != index)
3557                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3558                 } else {
3559                         backref->index = index;
3560                 }
3561         }
3562
3563         if (item_type == BTRFS_DIR_ITEM_KEY) {
3564                 if (backref->found_forward_ref)
3565                         rec->found_ref++;
3566                 backref->found_dir_item = 1;
3567         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3568                 backref->found_dir_index = 1;
3569         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3570                 if (backref->found_forward_ref)
3571                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3572                 else if (backref->found_dir_item)
3573                         rec->found_ref++;
3574                 backref->found_forward_ref = 1;
3575         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3576                 if (backref->found_back_ref)
3577                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3578                 backref->found_back_ref = 1;
3579         } else {
3580                 BUG_ON(1);
3581         }
3582
3583         if (backref->found_forward_ref && backref->found_dir_item)
3584                 backref->reachable = 1;
3585         return 0;
3586 }
3587
3588 static int merge_root_recs(struct btrfs_root *root,
3589                            struct cache_tree *src_cache,
3590                            struct cache_tree *dst_cache)
3591 {
3592         struct cache_extent *cache;
3593         struct ptr_node *node;
3594         struct inode_record *rec;
3595         struct inode_backref *backref;
3596         int ret = 0;
3597
3598         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3599                 free_inode_recs_tree(src_cache);
3600                 return 0;
3601         }
3602
3603         while (1) {
3604                 cache = search_cache_extent(src_cache, 0);
3605                 if (!cache)
3606                         break;
3607                 node = container_of(cache, struct ptr_node, cache);
3608                 rec = node->data;
3609                 remove_cache_extent(src_cache, &node->cache);
3610                 free(node);
3611
3612                 ret = is_child_root(root, root->objectid, rec->ino);
3613                 if (ret < 0)
3614                         break;
3615                 else if (ret == 0)
3616                         goto skip;
3617
3618                 list_for_each_entry(backref, &rec->backrefs, list) {
3619                         BUG_ON(backref->found_inode_ref);
3620                         if (backref->found_dir_item)
3621                                 add_root_backref(dst_cache, rec->ino,
3622                                         root->root_key.objectid, backref->dir,
3623                                         backref->index, backref->name,
3624                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3625                                         backref->errors);
3626                         if (backref->found_dir_index)
3627                                 add_root_backref(dst_cache, rec->ino,
3628                                         root->root_key.objectid, backref->dir,
3629                                         backref->index, backref->name,
3630                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3631                                         backref->errors);
3632                 }
3633 skip:
3634                 free_inode_rec(rec);
3635         }
3636         if (ret < 0)
3637                 return ret;
3638         return 0;
3639 }
3640
3641 static int check_root_refs(struct btrfs_root *root,
3642                            struct cache_tree *root_cache)
3643 {
3644         struct root_record *rec;
3645         struct root_record *ref_root;
3646         struct root_backref *backref;
3647         struct cache_extent *cache;
3648         int loop = 1;
3649         int ret;
3650         int error;
3651         int errors = 0;
3652
3653         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3654         BUG_ON(IS_ERR(rec));
3655         rec->found_ref = 1;
3656
3657         /* fixme: this can not detect circular references */
3658         while (loop) {
3659                 loop = 0;
3660                 cache = search_cache_extent(root_cache, 0);
3661                 while (1) {
3662                         if (!cache)
3663                                 break;
3664                         rec = container_of(cache, struct root_record, cache);
3665                         cache = next_cache_extent(cache);
3666
3667                         if (rec->found_ref == 0)
3668                                 continue;
3669
3670                         list_for_each_entry(backref, &rec->backrefs, list) {
3671                                 if (!backref->reachable)
3672                                         continue;
3673
3674                                 ref_root = get_root_rec(root_cache,
3675                                                         backref->ref_root);
3676                                 BUG_ON(IS_ERR(ref_root));
3677                                 if (ref_root->found_ref > 0)
3678                                         continue;
3679
3680                                 backref->reachable = 0;
3681                                 rec->found_ref--;
3682                                 if (rec->found_ref == 0)
3683                                         loop = 1;
3684                         }
3685                 }
3686         }
3687
3688         cache = search_cache_extent(root_cache, 0);
3689         while (1) {
3690                 if (!cache)
3691                         break;
3692                 rec = container_of(cache, struct root_record, cache);
3693                 cache = next_cache_extent(cache);
3694
3695                 if (rec->found_ref == 0 &&
3696                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3697                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3698                         ret = check_orphan_item(root->fs_info->tree_root,
3699                                                 rec->objectid);
3700                         if (ret == 0)
3701                                 continue;
3702
3703                         /*
3704                          * If we don't have a root item then we likely just have
3705                          * a dir item in a snapshot for this root but no actual
3706                          * ref key or anything so it's meaningless.
3707                          */
3708                         if (!rec->found_root_item)
3709                                 continue;
3710                         errors++;
3711                         fprintf(stderr, "fs tree %llu not referenced\n",
3712                                 (unsigned long long)rec->objectid);
3713                 }
3714
3715                 error = 0;
3716                 if (rec->found_ref > 0 && !rec->found_root_item)
3717                         error = 1;
3718                 list_for_each_entry(backref, &rec->backrefs, list) {
3719                         if (!backref->found_dir_item)
3720                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3721                         if (!backref->found_dir_index)
3722                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3723                         if (!backref->found_back_ref)
3724                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3725                         if (!backref->found_forward_ref)
3726                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3727                         if (backref->reachable && backref->errors)
3728                                 error = 1;
3729                 }
3730                 if (!error)
3731                         continue;
3732
3733                 errors++;
3734                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3735                         (unsigned long long)rec->objectid, rec->found_ref,
3736                          rec->found_root_item ? "" : "not found");
3737
3738                 list_for_each_entry(backref, &rec->backrefs, list) {
3739                         if (!backref->reachable)
3740                                 continue;
3741                         if (!backref->errors && rec->found_root_item)
3742                                 continue;
3743                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3744                                 " index %llu namelen %u name %s errors %x\n",
3745                                 (unsigned long long)backref->ref_root,
3746                                 (unsigned long long)backref->dir,
3747                                 (unsigned long long)backref->index,
3748                                 backref->namelen, backref->name,
3749                                 backref->errors);
3750                         print_ref_error(backref->errors);
3751                 }
3752         }
3753         return errors > 0 ? 1 : 0;
3754 }
3755
3756 static int process_root_ref(struct extent_buffer *eb, int slot,
3757                             struct btrfs_key *key,
3758                             struct cache_tree *root_cache)
3759 {
3760         u64 dirid;
3761         u64 index;
3762         u32 len;
3763         u32 name_len;
3764         struct btrfs_root_ref *ref;
3765         char namebuf[BTRFS_NAME_LEN];
3766         int error;
3767
3768         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3769
3770         dirid = btrfs_root_ref_dirid(eb, ref);
3771         index = btrfs_root_ref_sequence(eb, ref);
3772         name_len = btrfs_root_ref_name_len(eb, ref);
3773
3774         if (name_len <= BTRFS_NAME_LEN) {
3775                 len = name_len;
3776                 error = 0;
3777         } else {
3778                 len = BTRFS_NAME_LEN;
3779                 error = REF_ERR_NAME_TOO_LONG;
3780         }
3781         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3782
3783         if (key->type == BTRFS_ROOT_REF_KEY) {
3784                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3785                                  index, namebuf, len, key->type, error);
3786         } else {
3787                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3788                                  index, namebuf, len, key->type, error);
3789         }
3790         return 0;
3791 }
3792
3793 static void free_corrupt_block(struct cache_extent *cache)
3794 {
3795         struct btrfs_corrupt_block *corrupt;
3796
3797         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3798         free(corrupt);
3799 }
3800
3801 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3802
3803 /*
3804  * Repair the btree of the given root.
3805  *
3806  * The fix is to remove the node key in corrupt_blocks cache_tree.
3807  * and rebalance the tree.
3808  * After the fix, the btree should be writeable.
3809  */
3810 static int repair_btree(struct btrfs_root *root,
3811                         struct cache_tree *corrupt_blocks)
3812 {
3813         struct btrfs_trans_handle *trans;
3814         struct btrfs_path path;
3815         struct btrfs_corrupt_block *corrupt;
3816         struct cache_extent *cache;
3817         struct btrfs_key key;
3818         u64 offset;
3819         int level;
3820         int ret = 0;
3821
3822         if (cache_tree_empty(corrupt_blocks))
3823                 return 0;
3824
3825         trans = btrfs_start_transaction(root, 1);
3826         if (IS_ERR(trans)) {
3827                 ret = PTR_ERR(trans);
3828                 fprintf(stderr, "Error starting transaction: %s\n",
3829                         strerror(-ret));
3830                 return ret;
3831         }
3832         btrfs_init_path(&path);
3833         cache = first_cache_extent(corrupt_blocks);
3834         while (cache) {
3835                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3836                                        cache);
3837                 level = corrupt->level;
3838                 path.lowest_level = level;
3839                 key.objectid = corrupt->key.objectid;
3840                 key.type = corrupt->key.type;
3841                 key.offset = corrupt->key.offset;
3842
3843                 /*
3844                  * Here we don't want to do any tree balance, since it may
3845                  * cause a balance with corrupted brother leaf/node,
3846                  * so ins_len set to 0 here.
3847                  * Balance will be done after all corrupt node/leaf is deleted.
3848                  */
3849                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3850                 if (ret < 0)
3851                         goto out;
3852                 offset = btrfs_node_blockptr(path.nodes[level],
3853                                              path.slots[level]);
3854
3855                 /* Remove the ptr */
3856                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3857                 if (ret < 0)
3858                         goto out;
3859                 /*
3860                  * Remove the corresponding extent
3861                  * return value is not concerned.
3862                  */
3863                 btrfs_release_path(&path);
3864                 ret = btrfs_free_extent(trans, root, offset,
3865                                 root->fs_info->nodesize, 0,
3866                                 root->root_key.objectid, level - 1, 0);
3867                 cache = next_cache_extent(cache);
3868         }
3869
3870         /* Balance the btree using btrfs_search_slot() */
3871         cache = first_cache_extent(corrupt_blocks);
3872         while (cache) {
3873                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3874                                        cache);
3875                 memcpy(&key, &corrupt->key, sizeof(key));
3876                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3877                 if (ret < 0)
3878                         goto out;
3879                 /* return will always >0 since it won't find the item */
3880                 ret = 0;
3881                 btrfs_release_path(&path);
3882                 cache = next_cache_extent(cache);
3883         }
3884 out:
3885         btrfs_commit_transaction(trans, root);
3886         btrfs_release_path(&path);
3887         return ret;
3888 }
3889
3890 static int check_fs_root(struct btrfs_root *root,
3891                          struct cache_tree *root_cache,
3892                          struct walk_control *wc)
3893 {
3894         int ret = 0;
3895         int err = 0;
3896         int wret;
3897         int level;
3898         struct btrfs_path path;
3899         struct shared_node root_node;
3900         struct root_record *rec;
3901         struct btrfs_root_item *root_item = &root->root_item;
3902         struct cache_tree corrupt_blocks;
3903         struct orphan_data_extent *orphan;
3904         struct orphan_data_extent *tmp;
3905         enum btrfs_tree_block_status status;
3906         struct node_refs nrefs;
3907
3908         /*
3909          * Reuse the corrupt_block cache tree to record corrupted tree block
3910          *
3911          * Unlike the usage in extent tree check, here we do it in a per
3912          * fs/subvol tree base.
3913          */
3914         cache_tree_init(&corrupt_blocks);
3915         root->fs_info->corrupt_blocks = &corrupt_blocks;
3916
3917         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3918                 rec = get_root_rec(root_cache, root->root_key.objectid);
3919                 BUG_ON(IS_ERR(rec));
3920                 if (btrfs_root_refs(root_item) > 0)
3921                         rec->found_root_item = 1;
3922         }
3923
3924         btrfs_init_path(&path);
3925         memset(&root_node, 0, sizeof(root_node));
3926         cache_tree_init(&root_node.root_cache);
3927         cache_tree_init(&root_node.inode_cache);
3928         memset(&nrefs, 0, sizeof(nrefs));
3929
3930         /* Move the orphan extent record to corresponding inode_record */
3931         list_for_each_entry_safe(orphan, tmp,
3932                                  &root->orphan_data_extents, list) {
3933                 struct inode_record *inode;
3934
3935                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3936                                       1);
3937                 BUG_ON(IS_ERR(inode));
3938                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3939                 list_move(&orphan->list, &inode->orphan_extents);
3940         }
3941
3942         level = btrfs_header_level(root->node);
3943         memset(wc->nodes, 0, sizeof(wc->nodes));
3944         wc->nodes[level] = &root_node;
3945         wc->active_node = level;
3946         wc->root_level = level;
3947
3948         /* We may not have checked the root block, lets do that now */
3949         if (btrfs_is_leaf(root->node))
3950                 status = btrfs_check_leaf(root, NULL, root->node);
3951         else
3952                 status = btrfs_check_node(root, NULL, root->node);
3953         if (status != BTRFS_TREE_BLOCK_CLEAN)
3954                 return -EIO;
3955
3956         if (btrfs_root_refs(root_item) > 0 ||
3957             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3958                 path.nodes[level] = root->node;
3959                 extent_buffer_get(root->node);
3960                 path.slots[level] = 0;
3961         } else {
3962                 struct btrfs_key key;
3963                 struct btrfs_disk_key found_key;
3964
3965                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3966                 level = root_item->drop_level;
3967                 path.lowest_level = level;
3968                 if (level > btrfs_header_level(root->node) ||
3969                     level >= BTRFS_MAX_LEVEL) {
3970                         error("ignoring invalid drop level: %u", level);
3971                         goto skip_walking;
3972                 }
3973                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3974                 if (wret < 0)
3975                         goto skip_walking;
3976                 btrfs_node_key(path.nodes[level], &found_key,
3977                                 path.slots[level]);
3978                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3979                                         sizeof(found_key)));
3980         }
3981
3982         while (1) {
3983                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3984                 if (wret < 0)
3985                         ret = wret;
3986                 if (wret != 0)
3987                         break;
3988
3989                 wret = walk_up_tree(root, &path, wc, &level);
3990                 if (wret < 0)
3991                         ret = wret;
3992                 if (wret != 0)
3993                         break;
3994         }
3995 skip_walking:
3996         btrfs_release_path(&path);
3997
3998         if (!cache_tree_empty(&corrupt_blocks)) {
3999                 struct cache_extent *cache;
4000                 struct btrfs_corrupt_block *corrupt;
4001
4002                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4003                        root->root_key.objectid);
4004                 cache = first_cache_extent(&corrupt_blocks);
4005                 while (cache) {
4006                         corrupt = container_of(cache,
4007                                                struct btrfs_corrupt_block,
4008                                                cache);
4009                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4010                                cache->start, corrupt->level,
4011                                corrupt->key.objectid, corrupt->key.type,
4012                                corrupt->key.offset);
4013                         cache = next_cache_extent(cache);
4014                 }
4015                 if (repair) {
4016                         printf("Try to repair the btree for root %llu\n",
4017                                root->root_key.objectid);
4018                         ret = repair_btree(root, &corrupt_blocks);
4019                         if (ret < 0)
4020                                 fprintf(stderr, "Failed to repair btree: %s\n",
4021                                         strerror(-ret));
4022                         if (!ret)
4023                                 printf("Btree for root %llu is fixed\n",
4024                                        root->root_key.objectid);
4025                 }
4026         }
4027
4028         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4029         if (err < 0)
4030                 ret = err;
4031
4032         if (root_node.current) {
4033                 root_node.current->checked = 1;
4034                 maybe_free_inode_rec(&root_node.inode_cache,
4035                                 root_node.current);
4036         }
4037
4038         err = check_inode_recs(root, &root_node.inode_cache);
4039         if (!ret)
4040                 ret = err;
4041
4042         free_corrupt_blocks_tree(&corrupt_blocks);
4043         root->fs_info->corrupt_blocks = NULL;
4044         free_orphan_data_extents(&root->orphan_data_extents);
4045         return ret;
4046 }
4047
4048 static int fs_root_objectid(u64 objectid)
4049 {
4050         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4051             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4052                 return 1;
4053         return is_fstree(objectid);
4054 }
4055
4056 static int check_fs_roots(struct btrfs_root *root,
4057                           struct cache_tree *root_cache)
4058 {
4059         struct btrfs_path path;
4060         struct btrfs_key key;
4061         struct walk_control wc;
4062         struct extent_buffer *leaf, *tree_node;
4063         struct btrfs_root *tmp_root;
4064         struct btrfs_root *tree_root = root->fs_info->tree_root;
4065         int ret;
4066         int err = 0;
4067
4068         if (ctx.progress_enabled) {
4069                 ctx.tp = TASK_FS_ROOTS;
4070                 task_start(ctx.info);
4071         }
4072
4073         /*
4074          * Just in case we made any changes to the extent tree that weren't
4075          * reflected into the free space cache yet.
4076          */
4077         if (repair)
4078                 reset_cached_block_groups(root->fs_info);
4079         memset(&wc, 0, sizeof(wc));
4080         cache_tree_init(&wc.shared);
4081         btrfs_init_path(&path);
4082
4083 again:
4084         key.offset = 0;
4085         key.objectid = 0;
4086         key.type = BTRFS_ROOT_ITEM_KEY;
4087         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4088         if (ret < 0) {
4089                 err = 1;
4090                 goto out;
4091         }
4092         tree_node = tree_root->node;
4093         while (1) {
4094                 if (tree_node != tree_root->node) {
4095                         free_root_recs_tree(root_cache);
4096                         btrfs_release_path(&path);
4097                         goto again;
4098                 }
4099                 leaf = path.nodes[0];
4100                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4101                         ret = btrfs_next_leaf(tree_root, &path);
4102                         if (ret) {
4103                                 if (ret < 0)
4104                                         err = 1;
4105                                 break;
4106                         }
4107                         leaf = path.nodes[0];
4108                 }
4109                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4110                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4111                     fs_root_objectid(key.objectid)) {
4112                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4113                                 tmp_root = btrfs_read_fs_root_no_cache(
4114                                                 root->fs_info, &key);
4115                         } else {
4116                                 key.offset = (u64)-1;
4117                                 tmp_root = btrfs_read_fs_root(
4118                                                 root->fs_info, &key);
4119                         }
4120                         if (IS_ERR(tmp_root)) {
4121                                 err = 1;
4122                                 goto next;
4123                         }
4124                         ret = check_fs_root(tmp_root, root_cache, &wc);
4125                         if (ret == -EAGAIN) {
4126                                 free_root_recs_tree(root_cache);
4127                                 btrfs_release_path(&path);
4128                                 goto again;
4129                         }
4130                         if (ret)
4131                                 err = 1;
4132                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4133                                 btrfs_free_fs_root(tmp_root);
4134                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4135                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4136                         process_root_ref(leaf, path.slots[0], &key,
4137                                          root_cache);
4138                 }
4139 next:
4140                 path.slots[0]++;
4141         }
4142 out:
4143         btrfs_release_path(&path);
4144         if (err)
4145                 free_extent_cache_tree(&wc.shared);
4146         if (!cache_tree_empty(&wc.shared))
4147                 fprintf(stderr, "warning line %d\n", __LINE__);
4148
4149         task_stop(ctx.info);
4150
4151         return err;
4152 }
4153
4154 /*
4155  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4156  * INODE_REF/INODE_EXTREF match.
4157  *
4158  * @root:       the root of the fs/file tree
4159  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4160  * @key:        the key of the DIR_ITEM/DIR_INDEX
4161  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4162  *              distinguish root_dir between normal dir/file
4163  * @name:       the name in the INODE_REF/INODE_EXTREF
4164  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4165  * @mode:       the st_mode of INODE_ITEM
4166  *
4167  * Return 0 if no error occurred.
4168  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4169  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4170  * dir/file.
4171  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4172  * not match for normal dir/file.
4173  */
4174 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4175                          struct btrfs_key *key, u64 index, char *name,
4176                          u32 namelen, u32 mode)
4177 {
4178         struct btrfs_path path;
4179         struct extent_buffer *node;
4180         struct btrfs_dir_item *di;
4181         struct btrfs_key location;
4182         char namebuf[BTRFS_NAME_LEN] = {0};
4183         u32 total;
4184         u32 cur = 0;
4185         u32 len;
4186         u32 name_len;
4187         u32 data_len;
4188         u8 filetype;
4189         int slot;
4190         int ret;
4191
4192         btrfs_init_path(&path);
4193         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4194         if (ret < 0) {
4195                 ret = DIR_ITEM_MISSING;
4196                 goto out;
4197         }
4198
4199         /* Process root dir and goto out*/
4200         if (index == 0) {
4201                 if (ret == 0) {
4202                         ret = ROOT_DIR_ERROR;
4203                         error(
4204                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4205                                 root->objectid,
4206                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4207                                         "REF" : "EXTREF",
4208                                 ref_key->objectid, ref_key->offset,
4209                                 key->type == BTRFS_DIR_ITEM_KEY ?
4210                                         "DIR_ITEM" : "DIR_INDEX");
4211                 } else {
4212                         ret = 0;
4213                 }
4214
4215                 goto out;
4216         }
4217
4218         /* Process normal file/dir */
4219         if (ret > 0) {
4220                 ret = DIR_ITEM_MISSING;
4221                 error(
4222                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4223                         root->objectid,
4224                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4225                         ref_key->objectid, ref_key->offset,
4226                         key->type == BTRFS_DIR_ITEM_KEY ?
4227                                 "DIR_ITEM" : "DIR_INDEX",
4228                         key->objectid, key->offset, namelen, name,
4229                         imode_to_type(mode));
4230                 goto out;
4231         }
4232
4233         /* Check whether inode_id/filetype/name match */
4234         node = path.nodes[0];
4235         slot = path.slots[0];
4236         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4237         total = btrfs_item_size_nr(node, slot);
4238         while (cur < total) {
4239                 ret = DIR_ITEM_MISMATCH;
4240                 name_len = btrfs_dir_name_len(node, di);
4241                 data_len = btrfs_dir_data_len(node, di);
4242
4243                 btrfs_dir_item_key_to_cpu(node, di, &location);
4244                 if (location.objectid != ref_key->objectid ||
4245                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4246                     location.offset != 0)
4247                         goto next;
4248
4249                 filetype = btrfs_dir_type(node, di);
4250                 if (imode_to_type(mode) != filetype)
4251                         goto next;
4252
4253                 if (cur + sizeof(*di) + name_len > total ||
4254                     name_len > BTRFS_NAME_LEN) {
4255                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4256                                 root->objectid,
4257                                 key->type == BTRFS_DIR_ITEM_KEY ?
4258                                 "DIR_ITEM" : "DIR_INDEX",
4259                                 key->objectid, key->offset, name_len);
4260
4261                         if (cur + sizeof(*di) > total)
4262                                 break;
4263                         len = min_t(u32, total - cur - sizeof(*di),
4264                                     BTRFS_NAME_LEN);
4265                 } else {
4266                         len = name_len;
4267                 }
4268
4269                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4270                 if (len != namelen || strncmp(namebuf, name, len))
4271                         goto next;
4272
4273                 ret = 0;
4274                 goto out;
4275 next:
4276                 len = sizeof(*di) + name_len + data_len;
4277                 di = (struct btrfs_dir_item *)((char *)di + len);
4278                 cur += len;
4279         }
4280         if (ret == DIR_ITEM_MISMATCH)
4281                 error(
4282                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4283                         root->objectid,
4284                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4285                         ref_key->objectid, ref_key->offset,
4286                         key->type == BTRFS_DIR_ITEM_KEY ?
4287                                 "DIR_ITEM" : "DIR_INDEX",
4288                         key->objectid, key->offset, namelen, name,
4289                         imode_to_type(mode));
4290 out:
4291         btrfs_release_path(&path);
4292         return ret;
4293 }
4294
4295 /*
4296  * Traverse the given INODE_REF and call find_dir_item() to find related
4297  * DIR_ITEM/DIR_INDEX.
4298  *
4299  * @root:       the root of the fs/file tree
4300  * @ref_key:    the key of the INODE_REF
4301  * @refs:       the count of INODE_REF
4302  * @mode:       the st_mode of INODE_ITEM
4303  *
4304  * Return 0 if no error occurred.
4305  */
4306 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4307                            struct extent_buffer *node, int slot, u64 *refs,
4308                            int mode)
4309 {
4310         struct btrfs_key key;
4311         struct btrfs_inode_ref *ref;
4312         char namebuf[BTRFS_NAME_LEN] = {0};
4313         u32 total;
4314         u32 cur = 0;
4315         u32 len;
4316         u32 name_len;
4317         u64 index;
4318         int ret, err = 0;
4319
4320         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4321         total = btrfs_item_size_nr(node, slot);
4322
4323 next:
4324         /* Update inode ref count */
4325         (*refs)++;
4326
4327         index = btrfs_inode_ref_index(node, ref);
4328         name_len = btrfs_inode_ref_name_len(node, ref);
4329         if (cur + sizeof(*ref) + name_len > total ||
4330             name_len > BTRFS_NAME_LEN) {
4331                 warning("root %llu INODE_REF[%llu %llu] name too long",
4332                         root->objectid, ref_key->objectid, ref_key->offset);
4333
4334                 if (total < cur + sizeof(*ref))
4335                         goto out;
4336                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4337         } else {
4338                 len = name_len;
4339         }
4340
4341         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4342
4343         /* Check root dir ref name */
4344         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4345                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4346                       root->objectid, ref_key->objectid, ref_key->offset,
4347                       namebuf);
4348                 err |= ROOT_DIR_ERROR;
4349         }
4350
4351         /* Find related DIR_INDEX */
4352         key.objectid = ref_key->offset;
4353         key.type = BTRFS_DIR_INDEX_KEY;
4354         key.offset = index;
4355         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4356         err |= ret;
4357
4358         /* Find related dir_item */
4359         key.objectid = ref_key->offset;
4360         key.type = BTRFS_DIR_ITEM_KEY;
4361         key.offset = btrfs_name_hash(namebuf, len);
4362         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4363         err |= ret;
4364
4365         len = sizeof(*ref) + name_len;
4366         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4367         cur += len;
4368         if (cur < total)
4369                 goto next;
4370
4371 out:
4372         return err;
4373 }
4374
4375 /*
4376  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4377  * DIR_ITEM/DIR_INDEX.
4378  *
4379  * @root:       the root of the fs/file tree
4380  * @ref_key:    the key of the INODE_EXTREF
4381  * @refs:       the count of INODE_EXTREF
4382  * @mode:       the st_mode of INODE_ITEM
4383  *
4384  * Return 0 if no error occurred.
4385  */
4386 static int check_inode_extref(struct btrfs_root *root,
4387                               struct btrfs_key *ref_key,
4388                               struct extent_buffer *node, int slot, u64 *refs,
4389                               int mode)
4390 {
4391         struct btrfs_key key;
4392         struct btrfs_inode_extref *extref;
4393         char namebuf[BTRFS_NAME_LEN] = {0};
4394         u32 total;
4395         u32 cur = 0;
4396         u32 len;
4397         u32 name_len;
4398         u64 index;
4399         u64 parent;
4400         int ret;
4401         int err = 0;
4402
4403         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4404         total = btrfs_item_size_nr(node, slot);
4405
4406 next:
4407         /* update inode ref count */
4408         (*refs)++;
4409         name_len = btrfs_inode_extref_name_len(node, extref);
4410         index = btrfs_inode_extref_index(node, extref);
4411         parent = btrfs_inode_extref_parent(node, extref);
4412         if (name_len <= BTRFS_NAME_LEN) {
4413                 len = name_len;
4414         } else {
4415                 len = BTRFS_NAME_LEN;
4416                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4417                         root->objectid, ref_key->objectid, ref_key->offset);
4418         }
4419         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4420
4421         /* Check root dir ref name */
4422         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4423                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4424                       root->objectid, ref_key->objectid, ref_key->offset,
4425                       namebuf);
4426                 err |= ROOT_DIR_ERROR;
4427         }
4428
4429         /* find related dir_index */
4430         key.objectid = parent;
4431         key.type = BTRFS_DIR_INDEX_KEY;
4432         key.offset = index;
4433         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4434         err |= ret;
4435
4436         /* find related dir_item */
4437         key.objectid = parent;
4438         key.type = BTRFS_DIR_ITEM_KEY;
4439         key.offset = btrfs_name_hash(namebuf, len);
4440         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4441         err |= ret;
4442
4443         len = sizeof(*extref) + name_len;
4444         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4445         cur += len;
4446
4447         if (cur < total)
4448                 goto next;
4449
4450         return err;
4451 }
4452
4453 /*
4454  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4455  * DIR_ITEM/DIR_INDEX match.
4456  *
4457  * @root:       the root of the fs/file tree
4458  * @key:        the key of the INODE_REF/INODE_EXTREF
4459  * @name:       the name in the INODE_REF/INODE_EXTREF
4460  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4461  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4462  * to (u64)-1
4463  * @ext_ref:    the EXTENDED_IREF feature
4464  *
4465  * Return 0 if no error occurred.
4466  * Return >0 for error bitmap
4467  */
4468 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4469                           char *name, int namelen, u64 index,
4470                           unsigned int ext_ref)
4471 {
4472         struct btrfs_path path;
4473         struct btrfs_inode_ref *ref;
4474         struct btrfs_inode_extref *extref;
4475         struct extent_buffer *node;
4476         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4477         u32 total;
4478         u32 cur = 0;
4479         u32 len;
4480         u32 ref_namelen;
4481         u64 ref_index;
4482         u64 parent;
4483         u64 dir_id;
4484         int slot;
4485         int ret;
4486
4487         btrfs_init_path(&path);
4488         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4489         if (ret) {
4490                 ret = INODE_REF_MISSING;
4491                 goto extref;
4492         }
4493
4494         node = path.nodes[0];
4495         slot = path.slots[0];
4496
4497         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4498         total = btrfs_item_size_nr(node, slot);
4499
4500         /* Iterate all entry of INODE_REF */
4501         while (cur < total) {
4502                 ret = INODE_REF_MISSING;
4503
4504                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4505                 ref_index = btrfs_inode_ref_index(node, ref);
4506                 if (index != (u64)-1 && index != ref_index)
4507                         goto next_ref;
4508
4509                 if (cur + sizeof(*ref) + ref_namelen > total ||
4510                     ref_namelen > BTRFS_NAME_LEN) {
4511                         warning("root %llu INODE %s[%llu %llu] name too long",
4512                                 root->objectid,
4513                                 key->type == BTRFS_INODE_REF_KEY ?
4514                                         "REF" : "EXTREF",
4515                                 key->objectid, key->offset);
4516
4517                         if (cur + sizeof(*ref) > total)
4518                                 break;
4519                         len = min_t(u32, total - cur - sizeof(*ref),
4520                                     BTRFS_NAME_LEN);
4521                 } else {
4522                         len = ref_namelen;
4523                 }
4524
4525                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4526                                    len);
4527
4528                 if (len != namelen || strncmp(ref_namebuf, name, len))
4529                         goto next_ref;
4530
4531                 ret = 0;
4532                 goto out;
4533 next_ref:
4534                 len = sizeof(*ref) + ref_namelen;
4535                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4536                 cur += len;
4537         }
4538
4539 extref:
4540         /* Skip if not support EXTENDED_IREF feature */
4541         if (!ext_ref)
4542                 goto out;
4543
4544         btrfs_release_path(&path);
4545         btrfs_init_path(&path);
4546
4547         dir_id = key->offset;
4548         key->type = BTRFS_INODE_EXTREF_KEY;
4549         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4550
4551         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4552         if (ret) {
4553                 ret = INODE_REF_MISSING;
4554                 goto out;
4555         }
4556
4557         node = path.nodes[0];
4558         slot = path.slots[0];
4559
4560         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4561         cur = 0;
4562         total = btrfs_item_size_nr(node, slot);
4563
4564         /* Iterate all entry of INODE_EXTREF */
4565         while (cur < total) {
4566                 ret = INODE_REF_MISSING;
4567
4568                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4569                 ref_index = btrfs_inode_extref_index(node, extref);
4570                 parent = btrfs_inode_extref_parent(node, extref);
4571                 if (index != (u64)-1 && index != ref_index)
4572                         goto next_extref;
4573
4574                 if (parent != dir_id)
4575                         goto next_extref;
4576
4577                 if (ref_namelen <= BTRFS_NAME_LEN) {
4578                         len = ref_namelen;
4579                 } else {
4580                         len = BTRFS_NAME_LEN;
4581                         warning("root %llu INODE %s[%llu %llu] name too long",
4582                                 root->objectid,
4583                                 key->type == BTRFS_INODE_REF_KEY ?
4584                                         "REF" : "EXTREF",
4585                                 key->objectid, key->offset);
4586                 }
4587                 read_extent_buffer(node, ref_namebuf,
4588                                    (unsigned long)(extref + 1), len);
4589
4590                 if (len != namelen || strncmp(ref_namebuf, name, len))
4591                         goto next_extref;
4592
4593                 ret = 0;
4594                 goto out;
4595
4596 next_extref:
4597                 len = sizeof(*extref) + ref_namelen;
4598                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4599                 cur += len;
4600
4601         }
4602 out:
4603         btrfs_release_path(&path);
4604         return ret;
4605 }
4606
4607 /*
4608  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4609  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4610  *
4611  * @root:       the root of the fs/file tree
4612  * @key:        the key of the INODE_REF/INODE_EXTREF
4613  * @size:       the st_size of the INODE_ITEM
4614  * @ext_ref:    the EXTENDED_IREF feature
4615  *
4616  * Return 0 if no error occurred.
4617  */
4618 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4619                           struct extent_buffer *node, int slot, u64 *size,
4620                           unsigned int ext_ref)
4621 {
4622         struct btrfs_dir_item *di;
4623         struct btrfs_inode_item *ii;
4624         struct btrfs_path path;
4625         struct btrfs_key location;
4626         char namebuf[BTRFS_NAME_LEN] = {0};
4627         u32 total;
4628         u32 cur = 0;
4629         u32 len;
4630         u32 name_len;
4631         u32 data_len;
4632         u8 filetype;
4633         u32 mode;
4634         u64 index;
4635         int ret;
4636         int err = 0;
4637
4638         /*
4639          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4640          * ignore index check.
4641          */
4642         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4643
4644         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4645         total = btrfs_item_size_nr(node, slot);
4646
4647         while (cur < total) {
4648                 data_len = btrfs_dir_data_len(node, di);
4649                 if (data_len)
4650                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4651                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4652                               "DIR_ITEM" : "DIR_INDEX",
4653                               key->objectid, key->offset, data_len);
4654
4655                 name_len = btrfs_dir_name_len(node, di);
4656                 if (cur + sizeof(*di) + name_len > total ||
4657                     name_len > BTRFS_NAME_LEN) {
4658                         warning("root %llu %s[%llu %llu] name too long",
4659                                 root->objectid,
4660                                 key->type == BTRFS_DIR_ITEM_KEY ?
4661                                 "DIR_ITEM" : "DIR_INDEX",
4662                                 key->objectid, key->offset);
4663
4664                         if (cur + sizeof(*di) > total)
4665                                 break;
4666                         len = min_t(u32, total - cur - sizeof(*di),
4667                                     BTRFS_NAME_LEN);
4668                 } else {
4669                         len = name_len;
4670                 }
4671                 (*size) += name_len;
4672
4673                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4674                 filetype = btrfs_dir_type(node, di);
4675
4676                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4677                     key->offset != btrfs_name_hash(namebuf, len)) {
4678                         err |= -EIO;
4679                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4680                                 root->objectid, key->objectid, key->offset,
4681                                 namebuf, len, filetype, key->offset,
4682                                 btrfs_name_hash(namebuf, len));
4683                 }
4684
4685                 btrfs_init_path(&path);
4686                 btrfs_dir_item_key_to_cpu(node, di, &location);
4687
4688                 /* Ignore related ROOT_ITEM check */
4689                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4690                         goto next;
4691
4692                 /* Check relative INODE_ITEM(existence/filetype) */
4693                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4694                 if (ret) {
4695                         err |= INODE_ITEM_MISSING;
4696                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4697                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4698                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4699                               key->offset, location.objectid, name_len,
4700                               namebuf, filetype);
4701                         goto next;
4702                 }
4703
4704                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4705                                     struct btrfs_inode_item);
4706                 mode = btrfs_inode_mode(path.nodes[0], ii);
4707
4708                 if (imode_to_type(mode) != filetype) {
4709                         err |= INODE_ITEM_MISMATCH;
4710                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4711                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4712                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4713                               key->offset, name_len, namebuf, filetype);
4714                 }
4715
4716                 /* Check relative INODE_REF/INODE_EXTREF */
4717                 location.type = BTRFS_INODE_REF_KEY;
4718                 location.offset = key->objectid;
4719                 ret = find_inode_ref(root, &location, namebuf, len,
4720                                        index, ext_ref);
4721                 err |= ret;
4722                 if (ret & INODE_REF_MISSING)
4723                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4724                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4725                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4726                               key->offset, name_len, namebuf, filetype);
4727
4728 next:
4729                 btrfs_release_path(&path);
4730                 len = sizeof(*di) + name_len + data_len;
4731                 di = (struct btrfs_dir_item *)((char *)di + len);
4732                 cur += len;
4733
4734                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4735                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4736                               root->objectid, key->objectid, key->offset);
4737                         break;
4738                 }
4739         }
4740
4741         return err;
4742 }
4743
4744 /*
4745  * Check file extent datasum/hole, update the size of the file extents,
4746  * check and update the last offset of the file extent.
4747  *
4748  * @root:       the root of fs/file tree.
4749  * @fkey:       the key of the file extent.
4750  * @nodatasum:  INODE_NODATASUM feature.
4751  * @size:       the sum of all EXTENT_DATA items size for this inode.
4752  * @end:        the offset of the last extent.
4753  *
4754  * Return 0 if no error occurred.
4755  */
4756 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4757                              struct extent_buffer *node, int slot,
4758                              unsigned int nodatasum, u64 *size, u64 *end)
4759 {
4760         struct btrfs_file_extent_item *fi;
4761         u64 disk_bytenr;
4762         u64 disk_num_bytes;
4763         u64 extent_num_bytes;
4764         u64 extent_offset;
4765         u64 csum_found;         /* In byte size, sectorsize aligned */
4766         u64 search_start;       /* Logical range start we search for csum */
4767         u64 search_len;         /* Logical range len we search for csum */
4768         unsigned int extent_type;
4769         unsigned int is_hole;
4770         int compressed = 0;
4771         int ret;
4772         int err = 0;
4773
4774         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4775
4776         /* Check inline extent */
4777         extent_type = btrfs_file_extent_type(node, fi);
4778         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4779                 struct btrfs_item *e = btrfs_item_nr(slot);
4780                 u32 item_inline_len;
4781
4782                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4783                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4784                 compressed = btrfs_file_extent_compression(node, fi);
4785                 if (extent_num_bytes == 0) {
4786                         error(
4787                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4788                                 root->objectid, fkey->objectid, fkey->offset);
4789                         err |= FILE_EXTENT_ERROR;
4790                 }
4791                 if (!compressed && extent_num_bytes != item_inline_len) {
4792                         error(
4793                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4794                                 root->objectid, fkey->objectid, fkey->offset,
4795                                 extent_num_bytes, item_inline_len);
4796                         err |= FILE_EXTENT_ERROR;
4797                 }
4798                 *end += extent_num_bytes;
4799                 *size += extent_num_bytes;
4800                 return err;
4801         }
4802
4803         /* Check extent type */
4804         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4805                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4806                 err |= FILE_EXTENT_ERROR;
4807                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4808                       root->objectid, fkey->objectid, fkey->offset);
4809                 return err;
4810         }
4811
4812         /* Check REG_EXTENT/PREALLOC_EXTENT */
4813         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4814         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4815         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4816         extent_offset = btrfs_file_extent_offset(node, fi);
4817         compressed = btrfs_file_extent_compression(node, fi);
4818         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4819
4820         /*
4821          * Check EXTENT_DATA csum
4822          *
4823          * For plain (uncompressed) extent, we should only check the range
4824          * we're referring to, as it's possible that part of prealloc extent
4825          * has been written, and has csum:
4826          *
4827          * |<--- Original large preallocated extent A ---->|
4828          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4829          *      No csum                         Has csum
4830          *
4831          * For compressed extent, we should check the whole range.
4832          */
4833         if (!compressed) {
4834                 search_start = disk_bytenr + extent_offset;
4835                 search_len = extent_num_bytes;
4836         } else {
4837                 search_start = disk_bytenr;
4838                 search_len = disk_num_bytes;
4839         }
4840         ret = count_csum_range(root, search_start, search_len, &csum_found);
4841         if (csum_found > 0 && nodatasum) {
4842                 err |= ODD_CSUM_ITEM;
4843                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4844                       root->objectid, fkey->objectid, fkey->offset);
4845         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4846                    !is_hole && (ret < 0 || csum_found < search_len)) {
4847                 err |= CSUM_ITEM_MISSING;
4848                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4849                       root->objectid, fkey->objectid, fkey->offset,
4850                       csum_found, search_len);
4851         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4852                 err |= ODD_CSUM_ITEM;
4853                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4854                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4855         }
4856
4857         /* Check EXTENT_DATA hole */
4858         if (!no_holes && *end != fkey->offset) {
4859                 err |= FILE_EXTENT_ERROR;
4860                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4861                       root->objectid, fkey->objectid, fkey->offset);
4862         }
4863
4864         *end += extent_num_bytes;
4865         if (!is_hole)
4866                 *size += extent_num_bytes;
4867
4868         return err;
4869 }
4870
4871 /*
4872  * Check INODE_ITEM and related ITEMs (the same inode number)
4873  * 1. check link count
4874  * 2. check inode ref/extref
4875  * 3. check dir item/index
4876  *
4877  * @ext_ref:    the EXTENDED_IREF feature
4878  *
4879  * Return 0 if no error occurred.
4880  * Return >0 for error or hit the traversal is done(by error bitmap)
4881  */
4882 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4883                             unsigned int ext_ref)
4884 {
4885         struct extent_buffer *node;
4886         struct btrfs_inode_item *ii;
4887         struct btrfs_key key;
4888         u64 inode_id;
4889         u32 mode;
4890         u64 nlink;
4891         u64 nbytes;
4892         u64 isize;
4893         u64 size = 0;
4894         u64 refs = 0;
4895         u64 extent_end = 0;
4896         u64 extent_size = 0;
4897         unsigned int dir;
4898         unsigned int nodatasum;
4899         int slot;
4900         int ret;
4901         int err = 0;
4902
4903         node = path->nodes[0];
4904         slot = path->slots[0];
4905
4906         btrfs_item_key_to_cpu(node, &key, slot);
4907         inode_id = key.objectid;
4908
4909         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4910                 ret = btrfs_next_item(root, path);
4911                 if (ret > 0)
4912                         err |= LAST_ITEM;
4913                 return err;
4914         }
4915
4916         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4917         isize = btrfs_inode_size(node, ii);
4918         nbytes = btrfs_inode_nbytes(node, ii);
4919         mode = btrfs_inode_mode(node, ii);
4920         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4921         nlink = btrfs_inode_nlink(node, ii);
4922         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4923
4924         while (1) {
4925                 ret = btrfs_next_item(root, path);
4926                 if (ret < 0) {
4927                         /* out will fill 'err' rusing current statistics */
4928                         goto out;
4929                 } else if (ret > 0) {
4930                         err |= LAST_ITEM;
4931                         goto out;
4932                 }
4933
4934                 node = path->nodes[0];
4935                 slot = path->slots[0];
4936                 btrfs_item_key_to_cpu(node, &key, slot);
4937                 if (key.objectid != inode_id)
4938                         goto out;
4939
4940                 switch (key.type) {
4941                 case BTRFS_INODE_REF_KEY:
4942                         ret = check_inode_ref(root, &key, node, slot, &refs,
4943                                               mode);
4944                         err |= ret;
4945                         break;
4946                 case BTRFS_INODE_EXTREF_KEY:
4947                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4948                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4949                                         root->objectid, key.objectid,
4950                                         key.offset);
4951                         ret = check_inode_extref(root, &key, node, slot, &refs,
4952                                                  mode);
4953                         err |= ret;
4954                         break;
4955                 case BTRFS_DIR_ITEM_KEY:
4956                 case BTRFS_DIR_INDEX_KEY:
4957                         if (!dir) {
4958                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4959                                         root->objectid, inode_id,
4960                                         imode_to_type(mode), key.objectid,
4961                                         key.offset);
4962                         }
4963                         ret = check_dir_item(root, &key, node, slot, &size,
4964                                              ext_ref);
4965                         err |= ret;
4966                         break;
4967                 case BTRFS_EXTENT_DATA_KEY:
4968                         if (dir) {
4969                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4970                                         root->objectid, inode_id, key.objectid,
4971                                         key.offset);
4972                         }
4973                         ret = check_file_extent(root, &key, node, slot,
4974                                                 nodatasum, &extent_size,
4975                                                 &extent_end);
4976                         err |= ret;
4977                         break;
4978                 case BTRFS_XATTR_ITEM_KEY:
4979                         break;
4980                 default:
4981                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4982                               key.objectid, key.type, key.offset);
4983                 }
4984         }
4985
4986 out:
4987         /* verify INODE_ITEM nlink/isize/nbytes */
4988         if (dir) {
4989                 if (nlink != 1) {
4990                         err |= LINK_COUNT_ERROR;
4991                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4992                               root->objectid, inode_id, nlink);
4993                 }
4994
4995                 /*
4996                  * Just a warning, as dir inode nbytes is just an
4997                  * instructive value.
4998                  */
4999                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5000                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5001                                 root->objectid, inode_id,
5002                                 root->fs_info->nodesize);
5003                 }
5004
5005                 if (isize != size) {
5006                         err |= ISIZE_ERROR;
5007                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5008                               root->objectid, inode_id, isize, size);
5009                 }
5010         } else {
5011                 if (nlink != refs) {
5012                         err |= LINK_COUNT_ERROR;
5013                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5014                               root->objectid, inode_id, nlink, refs);
5015                 } else if (!nlink) {
5016                         err |= ORPHAN_ITEM;
5017                 }
5018
5019                 if (!nbytes && !no_holes && extent_end < isize) {
5020                         err |= NBYTES_ERROR;
5021                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5022                               root->objectid, inode_id, isize);
5023                 }
5024
5025                 if (nbytes != extent_size) {
5026                         err |= NBYTES_ERROR;
5027                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5028                               root->objectid, inode_id, nbytes, extent_size);
5029                 }
5030         }
5031
5032         return err;
5033 }
5034
5035 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5036 {
5037         struct btrfs_path path;
5038         struct btrfs_key key;
5039         int err = 0;
5040         int ret;
5041
5042         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5043         key.type = BTRFS_INODE_ITEM_KEY;
5044         key.offset = 0;
5045
5046         /* For root being dropped, we don't need to check first inode */
5047         if (btrfs_root_refs(&root->root_item) == 0 &&
5048             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5049             key.objectid)
5050                 return 0;
5051
5052         btrfs_init_path(&path);
5053
5054         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5055         if (ret < 0)
5056                 goto out;
5057         if (ret > 0) {
5058                 ret = 0;
5059                 err |= INODE_ITEM_MISSING;
5060                 error("first inode item of root %llu is missing",
5061                       root->objectid);
5062         }
5063
5064         err |= check_inode_item(root, &path, ext_ref);
5065         err &= ~LAST_ITEM;
5066         if (err && !ret)
5067                 ret = -EIO;
5068 out:
5069         btrfs_release_path(&path);
5070         return ret;
5071 }
5072
5073 /*
5074  * Iterate all item on the tree and call check_inode_item() to check.
5075  *
5076  * @root:       the root of the tree to be checked.
5077  * @ext_ref:    the EXTENDED_IREF feature
5078  *
5079  * Return 0 if no error found.
5080  * Return <0 for error.
5081  */
5082 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5083 {
5084         struct btrfs_path path;
5085         struct node_refs nrefs;
5086         struct btrfs_root_item *root_item = &root->root_item;
5087         int ret;
5088         int level;
5089         int err = 0;
5090
5091         /*
5092          * We need to manually check the first inode item(256)
5093          * As the following traversal function will only start from
5094          * the first inode item in the leaf, if inode item(256) is missing
5095          * we will just skip it forever.
5096          */
5097         ret = check_fs_first_inode(root, ext_ref);
5098         if (ret < 0)
5099                 return ret;
5100
5101         memset(&nrefs, 0, sizeof(nrefs));
5102         level = btrfs_header_level(root->node);
5103         btrfs_init_path(&path);
5104
5105         if (btrfs_root_refs(root_item) > 0 ||
5106             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5107                 path.nodes[level] = root->node;
5108                 path.slots[level] = 0;
5109                 extent_buffer_get(root->node);
5110         } else {
5111                 struct btrfs_key key;
5112
5113                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5114                 level = root_item->drop_level;
5115                 path.lowest_level = level;
5116                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5117                 if (ret < 0)
5118                         goto out;
5119                 ret = 0;
5120         }
5121
5122         while (1) {
5123                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5124                 err |= !!ret;
5125
5126                 /* if ret is negative, walk shall stop */
5127                 if (ret < 0) {
5128                         ret = err;
5129                         break;
5130                 }
5131
5132                 ret = walk_up_tree_v2(root, &path, &level);
5133                 if (ret != 0) {
5134                         /* Normal exit, reset ret to err */
5135                         ret = err;
5136                         break;
5137                 }
5138         }
5139
5140 out:
5141         btrfs_release_path(&path);
5142         return ret;
5143 }
5144
5145 /*
5146  * Find the relative ref for root_ref and root_backref.
5147  *
5148  * @root:       the root of the root tree.
5149  * @ref_key:    the key of the root ref.
5150  *
5151  * Return 0 if no error occurred.
5152  */
5153 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5154                           struct extent_buffer *node, int slot)
5155 {
5156         struct btrfs_path path;
5157         struct btrfs_key key;
5158         struct btrfs_root_ref *ref;
5159         struct btrfs_root_ref *backref;
5160         char ref_name[BTRFS_NAME_LEN] = {0};
5161         char backref_name[BTRFS_NAME_LEN] = {0};
5162         u64 ref_dirid;
5163         u64 ref_seq;
5164         u32 ref_namelen;
5165         u64 backref_dirid;
5166         u64 backref_seq;
5167         u32 backref_namelen;
5168         u32 len;
5169         int ret;
5170         int err = 0;
5171
5172         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5173         ref_dirid = btrfs_root_ref_dirid(node, ref);
5174         ref_seq = btrfs_root_ref_sequence(node, ref);
5175         ref_namelen = btrfs_root_ref_name_len(node, ref);
5176
5177         if (ref_namelen <= BTRFS_NAME_LEN) {
5178                 len = ref_namelen;
5179         } else {
5180                 len = BTRFS_NAME_LEN;
5181                 warning("%s[%llu %llu] ref_name too long",
5182                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5183                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5184                         ref_key->offset);
5185         }
5186         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5187
5188         /* Find relative root_ref */
5189         key.objectid = ref_key->offset;
5190         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5191         key.offset = ref_key->objectid;
5192
5193         btrfs_init_path(&path);
5194         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5195         if (ret) {
5196                 err |= ROOT_REF_MISSING;
5197                 error("%s[%llu %llu] couldn't find relative ref",
5198                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5199                       "ROOT_REF" : "ROOT_BACKREF",
5200                       ref_key->objectid, ref_key->offset);
5201                 goto out;
5202         }
5203
5204         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5205                                  struct btrfs_root_ref);
5206         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5207         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5208         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5209
5210         if (backref_namelen <= BTRFS_NAME_LEN) {
5211                 len = backref_namelen;
5212         } else {
5213                 len = BTRFS_NAME_LEN;
5214                 warning("%s[%llu %llu] ref_name too long",
5215                         key.type == BTRFS_ROOT_REF_KEY ?
5216                         "ROOT_REF" : "ROOT_BACKREF",
5217                         key.objectid, key.offset);
5218         }
5219         read_extent_buffer(path.nodes[0], backref_name,
5220                            (unsigned long)(backref + 1), len);
5221
5222         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5223             ref_namelen != backref_namelen ||
5224             strncmp(ref_name, backref_name, len)) {
5225                 err |= ROOT_REF_MISMATCH;
5226                 error("%s[%llu %llu] mismatch relative ref",
5227                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5228                       "ROOT_REF" : "ROOT_BACKREF",
5229                       ref_key->objectid, ref_key->offset);
5230         }
5231 out:
5232         btrfs_release_path(&path);
5233         return err;
5234 }
5235
5236 /*
5237  * Check all fs/file tree in low_memory mode.
5238  *
5239  * 1. for fs tree root item, call check_fs_root_v2()
5240  * 2. for fs tree root ref/backref, call check_root_ref()
5241  *
5242  * Return 0 if no error occurred.
5243  */
5244 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5245 {
5246         struct btrfs_root *tree_root = fs_info->tree_root;
5247         struct btrfs_root *cur_root = NULL;
5248         struct btrfs_path path;
5249         struct btrfs_key key;
5250         struct extent_buffer *node;
5251         unsigned int ext_ref;
5252         int slot;
5253         int ret;
5254         int err = 0;
5255
5256         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5257
5258         btrfs_init_path(&path);
5259         key.objectid = BTRFS_FS_TREE_OBJECTID;
5260         key.offset = 0;
5261         key.type = BTRFS_ROOT_ITEM_KEY;
5262
5263         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5264         if (ret < 0) {
5265                 err = ret;
5266                 goto out;
5267         } else if (ret > 0) {
5268                 err = -ENOENT;
5269                 goto out;
5270         }
5271
5272         while (1) {
5273                 node = path.nodes[0];
5274                 slot = path.slots[0];
5275                 btrfs_item_key_to_cpu(node, &key, slot);
5276                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5277                         goto out;
5278                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5279                     fs_root_objectid(key.objectid)) {
5280                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5281                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5282                                                                        &key);
5283                         } else {
5284                                 key.offset = (u64)-1;
5285                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5286                         }
5287
5288                         if (IS_ERR(cur_root)) {
5289                                 error("Fail to read fs/subvol tree: %lld",
5290                                       key.objectid);
5291                                 err = -EIO;
5292                                 goto next;
5293                         }
5294
5295                         ret = check_fs_root_v2(cur_root, ext_ref);
5296                         err |= ret;
5297
5298                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5299                                 btrfs_free_fs_root(cur_root);
5300                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5301                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5302                         ret = check_root_ref(tree_root, &key, node, slot);
5303                         err |= ret;
5304                 }
5305 next:
5306                 ret = btrfs_next_item(tree_root, &path);
5307                 if (ret > 0)
5308                         goto out;
5309                 if (ret < 0) {
5310                         err = ret;
5311                         goto out;
5312                 }
5313         }
5314
5315 out:
5316         btrfs_release_path(&path);
5317         return err;
5318 }
5319
5320 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5321 {
5322         struct list_head *cur = rec->backrefs.next;
5323         struct extent_backref *back;
5324         struct tree_backref *tback;
5325         struct data_backref *dback;
5326         u64 found = 0;
5327         int err = 0;
5328
5329         while(cur != &rec->backrefs) {
5330                 back = to_extent_backref(cur);
5331                 cur = cur->next;
5332                 if (!back->found_extent_tree) {
5333                         err = 1;
5334                         if (!print_errs)
5335                                 goto out;
5336                         if (back->is_data) {
5337                                 dback = to_data_backref(back);
5338                                 fprintf(stderr, "Backref %llu %s %llu"
5339                                         " owner %llu offset %llu num_refs %lu"
5340                                         " not found in extent tree\n",
5341                                         (unsigned long long)rec->start,
5342                                         back->full_backref ?
5343                                         "parent" : "root",
5344                                         back->full_backref ?
5345                                         (unsigned long long)dback->parent:
5346                                         (unsigned long long)dback->root,
5347                                         (unsigned long long)dback->owner,
5348                                         (unsigned long long)dback->offset,
5349                                         (unsigned long)dback->num_refs);
5350                         } else {
5351                                 tback = to_tree_backref(back);
5352                                 fprintf(stderr, "Backref %llu parent %llu"
5353                                         " root %llu not found in extent tree\n",
5354                                         (unsigned long long)rec->start,
5355                                         (unsigned long long)tback->parent,
5356                                         (unsigned long long)tback->root);
5357                         }
5358                 }
5359                 if (!back->is_data && !back->found_ref) {
5360                         err = 1;
5361                         if (!print_errs)
5362                                 goto out;
5363                         tback = to_tree_backref(back);
5364                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5365                                 (unsigned long long)rec->start,
5366                                 back->full_backref ? "parent" : "root",
5367                                 back->full_backref ?
5368                                 (unsigned long long)tback->parent :
5369                                 (unsigned long long)tback->root, back);
5370                 }
5371                 if (back->is_data) {
5372                         dback = to_data_backref(back);
5373                         if (dback->found_ref != dback->num_refs) {
5374                                 err = 1;
5375                                 if (!print_errs)
5376                                         goto out;
5377                                 fprintf(stderr, "Incorrect local backref count"
5378                                         " on %llu %s %llu owner %llu"
5379                                         " offset %llu found %u wanted %u back %p\n",
5380                                         (unsigned long long)rec->start,
5381                                         back->full_backref ?
5382                                         "parent" : "root",
5383                                         back->full_backref ?
5384                                         (unsigned long long)dback->parent:
5385                                         (unsigned long long)dback->root,
5386                                         (unsigned long long)dback->owner,
5387                                         (unsigned long long)dback->offset,
5388                                         dback->found_ref, dback->num_refs, back);
5389                         }
5390                         if (dback->disk_bytenr != rec->start) {
5391                                 err = 1;
5392                                 if (!print_errs)
5393                                         goto out;
5394                                 fprintf(stderr, "Backref disk bytenr does not"
5395                                         " match extent record, bytenr=%llu, "
5396                                         "ref bytenr=%llu\n",
5397                                         (unsigned long long)rec->start,
5398                                         (unsigned long long)dback->disk_bytenr);
5399                         }
5400
5401                         if (dback->bytes != rec->nr) {
5402                                 err = 1;
5403                                 if (!print_errs)
5404                                         goto out;
5405                                 fprintf(stderr, "Backref bytes do not match "
5406                                         "extent backref, bytenr=%llu, ref "
5407                                         "bytes=%llu, backref bytes=%llu\n",
5408                                         (unsigned long long)rec->start,
5409                                         (unsigned long long)rec->nr,
5410                                         (unsigned long long)dback->bytes);
5411                         }
5412                 }
5413                 if (!back->is_data) {
5414                         found += 1;
5415                 } else {
5416                         dback = to_data_backref(back);
5417                         found += dback->found_ref;
5418                 }
5419         }
5420         if (found != rec->refs) {
5421                 err = 1;
5422                 if (!print_errs)
5423                         goto out;
5424                 fprintf(stderr, "Incorrect global backref count "
5425                         "on %llu found %llu wanted %llu\n",
5426                         (unsigned long long)rec->start,
5427                         (unsigned long long)found,
5428                         (unsigned long long)rec->refs);
5429         }
5430 out:
5431         return err;
5432 }
5433
5434 static int free_all_extent_backrefs(struct extent_record *rec)
5435 {
5436         struct extent_backref *back;
5437         struct list_head *cur;
5438         while (!list_empty(&rec->backrefs)) {
5439                 cur = rec->backrefs.next;
5440                 back = to_extent_backref(cur);
5441                 list_del(cur);
5442                 free(back);
5443         }
5444         return 0;
5445 }
5446
5447 static void free_extent_record_cache(struct cache_tree *extent_cache)
5448 {
5449         struct cache_extent *cache;
5450         struct extent_record *rec;
5451
5452         while (1) {
5453                 cache = first_cache_extent(extent_cache);
5454                 if (!cache)
5455                         break;
5456                 rec = container_of(cache, struct extent_record, cache);
5457                 remove_cache_extent(extent_cache, cache);
5458                 free_all_extent_backrefs(rec);
5459                 free(rec);
5460         }
5461 }
5462
5463 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5464                                  struct extent_record *rec)
5465 {
5466         if (rec->content_checked && rec->owner_ref_checked &&
5467             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5468             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5469             !rec->bad_full_backref && !rec->crossing_stripes &&
5470             !rec->wrong_chunk_type) {
5471                 remove_cache_extent(extent_cache, &rec->cache);
5472                 free_all_extent_backrefs(rec);
5473                 list_del_init(&rec->list);
5474                 free(rec);
5475         }
5476         return 0;
5477 }
5478
5479 static int check_owner_ref(struct btrfs_root *root,
5480                             struct extent_record *rec,
5481                             struct extent_buffer *buf)
5482 {
5483         struct extent_backref *node;
5484         struct tree_backref *back;
5485         struct btrfs_root *ref_root;
5486         struct btrfs_key key;
5487         struct btrfs_path path;
5488         struct extent_buffer *parent;
5489         int level;
5490         int found = 0;
5491         int ret;
5492
5493         list_for_each_entry(node, &rec->backrefs, list) {
5494                 if (node->is_data)
5495                         continue;
5496                 if (!node->found_ref)
5497                         continue;
5498                 if (node->full_backref)
5499                         continue;
5500                 back = to_tree_backref(node);
5501                 if (btrfs_header_owner(buf) == back->root)
5502                         return 0;
5503         }
5504         BUG_ON(rec->is_root);
5505
5506         /* try to find the block by search corresponding fs tree */
5507         key.objectid = btrfs_header_owner(buf);
5508         key.type = BTRFS_ROOT_ITEM_KEY;
5509         key.offset = (u64)-1;
5510
5511         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5512         if (IS_ERR(ref_root))
5513                 return 1;
5514
5515         level = btrfs_header_level(buf);
5516         if (level == 0)
5517                 btrfs_item_key_to_cpu(buf, &key, 0);
5518         else
5519                 btrfs_node_key_to_cpu(buf, &key, 0);
5520
5521         btrfs_init_path(&path);
5522         path.lowest_level = level + 1;
5523         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5524         if (ret < 0)
5525                 return 0;
5526
5527         parent = path.nodes[level + 1];
5528         if (parent && buf->start == btrfs_node_blockptr(parent,
5529                                                         path.slots[level + 1]))
5530                 found = 1;
5531
5532         btrfs_release_path(&path);
5533         return found ? 0 : 1;
5534 }
5535
5536 static int is_extent_tree_record(struct extent_record *rec)
5537 {
5538         struct list_head *cur = rec->backrefs.next;
5539         struct extent_backref *node;
5540         struct tree_backref *back;
5541         int is_extent = 0;
5542
5543         while(cur != &rec->backrefs) {
5544                 node = to_extent_backref(cur);
5545                 cur = cur->next;
5546                 if (node->is_data)
5547                         return 0;
5548                 back = to_tree_backref(node);
5549                 if (node->full_backref)
5550                         return 0;
5551                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5552                         is_extent = 1;
5553         }
5554         return is_extent;
5555 }
5556
5557
5558 static int record_bad_block_io(struct btrfs_fs_info *info,
5559                                struct cache_tree *extent_cache,
5560                                u64 start, u64 len)
5561 {
5562         struct extent_record *rec;
5563         struct cache_extent *cache;
5564         struct btrfs_key key;
5565
5566         cache = lookup_cache_extent(extent_cache, start, len);
5567         if (!cache)
5568                 return 0;
5569
5570         rec = container_of(cache, struct extent_record, cache);
5571         if (!is_extent_tree_record(rec))
5572                 return 0;
5573
5574         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5575         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5576 }
5577
5578 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5579                        struct extent_buffer *buf, int slot)
5580 {
5581         if (btrfs_header_level(buf)) {
5582                 struct btrfs_key_ptr ptr1, ptr2;
5583
5584                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5585                                    sizeof(struct btrfs_key_ptr));
5586                 read_extent_buffer(buf, &ptr2,
5587                                    btrfs_node_key_ptr_offset(slot + 1),
5588                                    sizeof(struct btrfs_key_ptr));
5589                 write_extent_buffer(buf, &ptr1,
5590                                     btrfs_node_key_ptr_offset(slot + 1),
5591                                     sizeof(struct btrfs_key_ptr));
5592                 write_extent_buffer(buf, &ptr2,
5593                                     btrfs_node_key_ptr_offset(slot),
5594                                     sizeof(struct btrfs_key_ptr));
5595                 if (slot == 0) {
5596                         struct btrfs_disk_key key;
5597                         btrfs_node_key(buf, &key, 0);
5598                         btrfs_fixup_low_keys(root, path, &key,
5599                                              btrfs_header_level(buf) + 1);
5600                 }
5601         } else {
5602                 struct btrfs_item *item1, *item2;
5603                 struct btrfs_key k1, k2;
5604                 char *item1_data, *item2_data;
5605                 u32 item1_offset, item2_offset, item1_size, item2_size;
5606
5607                 item1 = btrfs_item_nr(slot);
5608                 item2 = btrfs_item_nr(slot + 1);
5609                 btrfs_item_key_to_cpu(buf, &k1, slot);
5610                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5611                 item1_offset = btrfs_item_offset(buf, item1);
5612                 item2_offset = btrfs_item_offset(buf, item2);
5613                 item1_size = btrfs_item_size(buf, item1);
5614                 item2_size = btrfs_item_size(buf, item2);
5615
5616                 item1_data = malloc(item1_size);
5617                 if (!item1_data)
5618                         return -ENOMEM;
5619                 item2_data = malloc(item2_size);
5620                 if (!item2_data) {
5621                         free(item1_data);
5622                         return -ENOMEM;
5623                 }
5624
5625                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5626                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5627
5628                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5629                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5630                 free(item1_data);
5631                 free(item2_data);
5632
5633                 btrfs_set_item_offset(buf, item1, item2_offset);
5634                 btrfs_set_item_offset(buf, item2, item1_offset);
5635                 btrfs_set_item_size(buf, item1, item2_size);
5636                 btrfs_set_item_size(buf, item2, item1_size);
5637
5638                 path->slots[0] = slot;
5639                 btrfs_set_item_key_unsafe(root, path, &k2);
5640                 path->slots[0] = slot + 1;
5641                 btrfs_set_item_key_unsafe(root, path, &k1);
5642         }
5643         return 0;
5644 }
5645
5646 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5647 {
5648         struct extent_buffer *buf;
5649         struct btrfs_key k1, k2;
5650         int i;
5651         int level = path->lowest_level;
5652         int ret = -EIO;
5653
5654         buf = path->nodes[level];
5655         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5656                 if (level) {
5657                         btrfs_node_key_to_cpu(buf, &k1, i);
5658                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5659                 } else {
5660                         btrfs_item_key_to_cpu(buf, &k1, i);
5661                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5662                 }
5663                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5664                         continue;
5665                 ret = swap_values(root, path, buf, i);
5666                 if (ret)
5667                         break;
5668                 btrfs_mark_buffer_dirty(buf);
5669                 i = 0;
5670         }
5671         return ret;
5672 }
5673
5674 static int delete_bogus_item(struct btrfs_root *root,
5675                              struct btrfs_path *path,
5676                              struct extent_buffer *buf, int slot)
5677 {
5678         struct btrfs_key key;
5679         int nritems = btrfs_header_nritems(buf);
5680
5681         btrfs_item_key_to_cpu(buf, &key, slot);
5682
5683         /* These are all the keys we can deal with missing. */
5684         if (key.type != BTRFS_DIR_INDEX_KEY &&
5685             key.type != BTRFS_EXTENT_ITEM_KEY &&
5686             key.type != BTRFS_METADATA_ITEM_KEY &&
5687             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5688             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5689                 return -1;
5690
5691         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5692                (unsigned long long)key.objectid, key.type,
5693                (unsigned long long)key.offset, slot, buf->start);
5694         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5695                               btrfs_item_nr_offset(slot + 1),
5696                               sizeof(struct btrfs_item) *
5697                               (nritems - slot - 1));
5698         btrfs_set_header_nritems(buf, nritems - 1);
5699         if (slot == 0) {
5700                 struct btrfs_disk_key disk_key;
5701
5702                 btrfs_item_key(buf, &disk_key, 0);
5703                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5704         }
5705         btrfs_mark_buffer_dirty(buf);
5706         return 0;
5707 }
5708
5709 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5710 {
5711         struct extent_buffer *buf;
5712         int i;
5713         int ret = 0;
5714
5715         /* We should only get this for leaves */
5716         BUG_ON(path->lowest_level);
5717         buf = path->nodes[0];
5718 again:
5719         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5720                 unsigned int shift = 0, offset;
5721
5722                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5723                     BTRFS_LEAF_DATA_SIZE(root)) {
5724                         if (btrfs_item_end_nr(buf, i) >
5725                             BTRFS_LEAF_DATA_SIZE(root)) {
5726                                 ret = delete_bogus_item(root, path, buf, i);
5727                                 if (!ret)
5728                                         goto again;
5729                                 fprintf(stderr, "item is off the end of the "
5730                                         "leaf, can't fix\n");
5731                                 ret = -EIO;
5732                                 break;
5733                         }
5734                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5735                                 btrfs_item_end_nr(buf, i);
5736                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5737                            btrfs_item_offset_nr(buf, i - 1)) {
5738                         if (btrfs_item_end_nr(buf, i) >
5739                             btrfs_item_offset_nr(buf, i - 1)) {
5740                                 ret = delete_bogus_item(root, path, buf, i);
5741                                 if (!ret)
5742                                         goto again;
5743                                 fprintf(stderr, "items overlap, can't fix\n");
5744                                 ret = -EIO;
5745                                 break;
5746                         }
5747                         shift = btrfs_item_offset_nr(buf, i - 1) -
5748                                 btrfs_item_end_nr(buf, i);
5749                 }
5750                 if (!shift)
5751                         continue;
5752
5753                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5754                        i, shift, (unsigned long long)buf->start);
5755                 offset = btrfs_item_offset_nr(buf, i);
5756                 memmove_extent_buffer(buf,
5757                                       btrfs_leaf_data(buf) + offset + shift,
5758                                       btrfs_leaf_data(buf) + offset,
5759                                       btrfs_item_size_nr(buf, i));
5760                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5761                                       offset + shift);
5762                 btrfs_mark_buffer_dirty(buf);
5763         }
5764
5765         /*
5766          * We may have moved things, in which case we want to exit so we don't
5767          * write those changes out.  Once we have proper abort functionality in
5768          * progs this can be changed to something nicer.
5769          */
5770         BUG_ON(ret);
5771         return ret;
5772 }
5773
5774 /*
5775  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5776  * then just return -EIO.
5777  */
5778 static int try_to_fix_bad_block(struct btrfs_root *root,
5779                                 struct extent_buffer *buf,
5780                                 enum btrfs_tree_block_status status)
5781 {
5782         struct btrfs_trans_handle *trans;
5783         struct ulist *roots;
5784         struct ulist_node *node;
5785         struct btrfs_root *search_root;
5786         struct btrfs_path path;
5787         struct ulist_iterator iter;
5788         struct btrfs_key root_key, key;
5789         int ret;
5790
5791         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5792             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5793                 return -EIO;
5794
5795         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5796         if (ret)
5797                 return -EIO;
5798
5799         btrfs_init_path(&path);
5800         ULIST_ITER_INIT(&iter);
5801         while ((node = ulist_next(roots, &iter))) {
5802                 root_key.objectid = node->val;
5803                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5804                 root_key.offset = (u64)-1;
5805
5806                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5807                 if (IS_ERR(root)) {
5808                         ret = -EIO;
5809                         break;
5810                 }
5811
5812
5813                 trans = btrfs_start_transaction(search_root, 0);
5814                 if (IS_ERR(trans)) {
5815                         ret = PTR_ERR(trans);
5816                         break;
5817                 }
5818
5819                 path.lowest_level = btrfs_header_level(buf);
5820                 path.skip_check_block = 1;
5821                 if (path.lowest_level)
5822                         btrfs_node_key_to_cpu(buf, &key, 0);
5823                 else
5824                         btrfs_item_key_to_cpu(buf, &key, 0);
5825                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5826                 if (ret) {
5827                         ret = -EIO;
5828                         btrfs_commit_transaction(trans, search_root);
5829                         break;
5830                 }
5831                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5832                         ret = fix_key_order(search_root, &path);
5833                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5834                         ret = fix_item_offset(search_root, &path);
5835                 if (ret) {
5836                         btrfs_commit_transaction(trans, search_root);
5837                         break;
5838                 }
5839                 btrfs_release_path(&path);
5840                 btrfs_commit_transaction(trans, search_root);
5841         }
5842         ulist_free(roots);
5843         btrfs_release_path(&path);
5844         return ret;
5845 }
5846
5847 static int check_block(struct btrfs_root *root,
5848                        struct cache_tree *extent_cache,
5849                        struct extent_buffer *buf, u64 flags)
5850 {
5851         struct extent_record *rec;
5852         struct cache_extent *cache;
5853         struct btrfs_key key;
5854         enum btrfs_tree_block_status status;
5855         int ret = 0;
5856         int level;
5857
5858         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5859         if (!cache)
5860                 return 1;
5861         rec = container_of(cache, struct extent_record, cache);
5862         rec->generation = btrfs_header_generation(buf);
5863
5864         level = btrfs_header_level(buf);
5865         if (btrfs_header_nritems(buf) > 0) {
5866
5867                 if (level == 0)
5868                         btrfs_item_key_to_cpu(buf, &key, 0);
5869                 else
5870                         btrfs_node_key_to_cpu(buf, &key, 0);
5871
5872                 rec->info_objectid = key.objectid;
5873         }
5874         rec->info_level = level;
5875
5876         if (btrfs_is_leaf(buf))
5877                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5878         else
5879                 status = btrfs_check_node(root, &rec->parent_key, buf);
5880
5881         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5882                 if (repair)
5883                         status = try_to_fix_bad_block(root, buf, status);
5884                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5885                         ret = -EIO;
5886                         fprintf(stderr, "bad block %llu\n",
5887                                 (unsigned long long)buf->start);
5888                 } else {
5889                         /*
5890                          * Signal to callers we need to start the scan over
5891                          * again since we'll have cowed blocks.
5892                          */
5893                         ret = -EAGAIN;
5894                 }
5895         } else {
5896                 rec->content_checked = 1;
5897                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5898                         rec->owner_ref_checked = 1;
5899                 else {
5900                         ret = check_owner_ref(root, rec, buf);
5901                         if (!ret)
5902                                 rec->owner_ref_checked = 1;
5903                 }
5904         }
5905         if (!ret)
5906                 maybe_free_extent_rec(extent_cache, rec);
5907         return ret;
5908 }
5909
5910 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5911                                                 u64 parent, u64 root)
5912 {
5913         struct list_head *cur = rec->backrefs.next;
5914         struct extent_backref *node;
5915         struct tree_backref *back;
5916
5917         while(cur != &rec->backrefs) {
5918                 node = to_extent_backref(cur);
5919                 cur = cur->next;
5920                 if (node->is_data)
5921                         continue;
5922                 back = to_tree_backref(node);
5923                 if (parent > 0) {
5924                         if (!node->full_backref)
5925                                 continue;
5926                         if (parent == back->parent)
5927                                 return back;
5928                 } else {
5929                         if (node->full_backref)
5930                                 continue;
5931                         if (back->root == root)
5932                                 return back;
5933                 }
5934         }
5935         return NULL;
5936 }
5937
5938 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5939                                                 u64 parent, u64 root)
5940 {
5941         struct tree_backref *ref = malloc(sizeof(*ref));
5942
5943         if (!ref)
5944                 return NULL;
5945         memset(&ref->node, 0, sizeof(ref->node));
5946         if (parent > 0) {
5947                 ref->parent = parent;
5948                 ref->node.full_backref = 1;
5949         } else {
5950                 ref->root = root;
5951                 ref->node.full_backref = 0;
5952         }
5953         list_add_tail(&ref->node.list, &rec->backrefs);
5954
5955         return ref;
5956 }
5957
5958 static struct data_backref *find_data_backref(struct extent_record *rec,
5959                                                 u64 parent, u64 root,
5960                                                 u64 owner, u64 offset,
5961                                                 int found_ref,
5962                                                 u64 disk_bytenr, u64 bytes)
5963 {
5964         struct list_head *cur = rec->backrefs.next;
5965         struct extent_backref *node;
5966         struct data_backref *back;
5967
5968         while(cur != &rec->backrefs) {
5969                 node = to_extent_backref(cur);
5970                 cur = cur->next;
5971                 if (!node->is_data)
5972                         continue;
5973                 back = to_data_backref(node);
5974                 if (parent > 0) {
5975                         if (!node->full_backref)
5976                                 continue;
5977                         if (parent == back->parent)
5978                                 return back;
5979                 } else {
5980                         if (node->full_backref)
5981                                 continue;
5982                         if (back->root == root && back->owner == owner &&
5983                             back->offset == offset) {
5984                                 if (found_ref && node->found_ref &&
5985                                     (back->bytes != bytes ||
5986                                     back->disk_bytenr != disk_bytenr))
5987                                         continue;
5988                                 return back;
5989                         }
5990                 }
5991         }
5992         return NULL;
5993 }
5994
5995 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5996                                                 u64 parent, u64 root,
5997                                                 u64 owner, u64 offset,
5998                                                 u64 max_size)
5999 {
6000         struct data_backref *ref = malloc(sizeof(*ref));
6001
6002         if (!ref)
6003                 return NULL;
6004         memset(&ref->node, 0, sizeof(ref->node));
6005         ref->node.is_data = 1;
6006
6007         if (parent > 0) {
6008                 ref->parent = parent;
6009                 ref->owner = 0;
6010                 ref->offset = 0;
6011                 ref->node.full_backref = 1;
6012         } else {
6013                 ref->root = root;
6014                 ref->owner = owner;
6015                 ref->offset = offset;
6016                 ref->node.full_backref = 0;
6017         }
6018         ref->bytes = max_size;
6019         ref->found_ref = 0;
6020         ref->num_refs = 0;
6021         list_add_tail(&ref->node.list, &rec->backrefs);
6022         if (max_size > rec->max_size)
6023                 rec->max_size = max_size;
6024         return ref;
6025 }
6026
6027 /* Check if the type of extent matches with its chunk */
6028 static void check_extent_type(struct extent_record *rec)
6029 {
6030         struct btrfs_block_group_cache *bg_cache;
6031
6032         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6033         if (!bg_cache)
6034                 return;
6035
6036         /* data extent, check chunk directly*/
6037         if (!rec->metadata) {
6038                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6039                         rec->wrong_chunk_type = 1;
6040                 return;
6041         }
6042
6043         /* metadata extent, check the obvious case first */
6044         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6045                                  BTRFS_BLOCK_GROUP_METADATA))) {
6046                 rec->wrong_chunk_type = 1;
6047                 return;
6048         }
6049
6050         /*
6051          * Check SYSTEM extent, as it's also marked as metadata, we can only
6052          * make sure it's a SYSTEM extent by its backref
6053          */
6054         if (!list_empty(&rec->backrefs)) {
6055                 struct extent_backref *node;
6056                 struct tree_backref *tback;
6057                 u64 bg_type;
6058
6059                 node = to_extent_backref(rec->backrefs.next);
6060                 if (node->is_data) {
6061                         /* tree block shouldn't have data backref */
6062                         rec->wrong_chunk_type = 1;
6063                         return;
6064                 }
6065                 tback = container_of(node, struct tree_backref, node);
6066
6067                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6068                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6069                 else
6070                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6071                 if (!(bg_cache->flags & bg_type))
6072                         rec->wrong_chunk_type = 1;
6073         }
6074 }
6075
6076 /*
6077  * Allocate a new extent record, fill default values from @tmpl and insert int
6078  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6079  * the cache, otherwise it fails.
6080  */
6081 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6082                 struct extent_record *tmpl)
6083 {
6084         struct extent_record *rec;
6085         int ret = 0;
6086
6087         BUG_ON(tmpl->max_size == 0);
6088         rec = malloc(sizeof(*rec));
6089         if (!rec)
6090                 return -ENOMEM;
6091         rec->start = tmpl->start;
6092         rec->max_size = tmpl->max_size;
6093         rec->nr = max(tmpl->nr, tmpl->max_size);
6094         rec->found_rec = tmpl->found_rec;
6095         rec->content_checked = tmpl->content_checked;
6096         rec->owner_ref_checked = tmpl->owner_ref_checked;
6097         rec->num_duplicates = 0;
6098         rec->metadata = tmpl->metadata;
6099         rec->flag_block_full_backref = FLAG_UNSET;
6100         rec->bad_full_backref = 0;
6101         rec->crossing_stripes = 0;
6102         rec->wrong_chunk_type = 0;
6103         rec->is_root = tmpl->is_root;
6104         rec->refs = tmpl->refs;
6105         rec->extent_item_refs = tmpl->extent_item_refs;
6106         rec->parent_generation = tmpl->parent_generation;
6107         INIT_LIST_HEAD(&rec->backrefs);
6108         INIT_LIST_HEAD(&rec->dups);
6109         INIT_LIST_HEAD(&rec->list);
6110         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6111         rec->cache.start = tmpl->start;
6112         rec->cache.size = tmpl->nr;
6113         ret = insert_cache_extent(extent_cache, &rec->cache);
6114         if (ret) {
6115                 free(rec);
6116                 return ret;
6117         }
6118         bytes_used += rec->nr;
6119
6120         if (tmpl->metadata)
6121                 rec->crossing_stripes = check_crossing_stripes(global_info,
6122                                 rec->start, global_info->nodesize);
6123         check_extent_type(rec);
6124         return ret;
6125 }
6126
6127 /*
6128  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6129  * some are hints:
6130  * - refs              - if found, increase refs
6131  * - is_root           - if found, set
6132  * - content_checked   - if found, set
6133  * - owner_ref_checked - if found, set
6134  *
6135  * If not found, create a new one, initialize and insert.
6136  */
6137 static int add_extent_rec(struct cache_tree *extent_cache,
6138                 struct extent_record *tmpl)
6139 {
6140         struct extent_record *rec;
6141         struct cache_extent *cache;
6142         int ret = 0;
6143         int dup = 0;
6144
6145         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6146         if (cache) {
6147                 rec = container_of(cache, struct extent_record, cache);
6148                 if (tmpl->refs)
6149                         rec->refs++;
6150                 if (rec->nr == 1)
6151                         rec->nr = max(tmpl->nr, tmpl->max_size);
6152
6153                 /*
6154                  * We need to make sure to reset nr to whatever the extent
6155                  * record says was the real size, this way we can compare it to
6156                  * the backrefs.
6157                  */
6158                 if (tmpl->found_rec) {
6159                         if (tmpl->start != rec->start || rec->found_rec) {
6160                                 struct extent_record *tmp;
6161
6162                                 dup = 1;
6163                                 if (list_empty(&rec->list))
6164                                         list_add_tail(&rec->list,
6165                                                       &duplicate_extents);
6166
6167                                 /*
6168                                  * We have to do this song and dance in case we
6169                                  * find an extent record that falls inside of
6170                                  * our current extent record but does not have
6171                                  * the same objectid.
6172                                  */
6173                                 tmp = malloc(sizeof(*tmp));
6174                                 if (!tmp)
6175                                         return -ENOMEM;
6176                                 tmp->start = tmpl->start;
6177                                 tmp->max_size = tmpl->max_size;
6178                                 tmp->nr = tmpl->nr;
6179                                 tmp->found_rec = 1;
6180                                 tmp->metadata = tmpl->metadata;
6181                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6182                                 INIT_LIST_HEAD(&tmp->list);
6183                                 list_add_tail(&tmp->list, &rec->dups);
6184                                 rec->num_duplicates++;
6185                         } else {
6186                                 rec->nr = tmpl->nr;
6187                                 rec->found_rec = 1;
6188                         }
6189                 }
6190
6191                 if (tmpl->extent_item_refs && !dup) {
6192                         if (rec->extent_item_refs) {
6193                                 fprintf(stderr, "block %llu rec "
6194                                         "extent_item_refs %llu, passed %llu\n",
6195                                         (unsigned long long)tmpl->start,
6196                                         (unsigned long long)
6197                                                         rec->extent_item_refs,
6198                                         (unsigned long long)tmpl->extent_item_refs);
6199                         }
6200                         rec->extent_item_refs = tmpl->extent_item_refs;
6201                 }
6202                 if (tmpl->is_root)
6203                         rec->is_root = 1;
6204                 if (tmpl->content_checked)
6205                         rec->content_checked = 1;
6206                 if (tmpl->owner_ref_checked)
6207                         rec->owner_ref_checked = 1;
6208                 memcpy(&rec->parent_key, &tmpl->parent_key,
6209                                 sizeof(tmpl->parent_key));
6210                 if (tmpl->parent_generation)
6211                         rec->parent_generation = tmpl->parent_generation;
6212                 if (rec->max_size < tmpl->max_size)
6213                         rec->max_size = tmpl->max_size;
6214
6215                 /*
6216                  * A metadata extent can't cross stripe_len boundary, otherwise
6217                  * kernel scrub won't be able to handle it.
6218                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6219                  * it.
6220                  */
6221                 if (tmpl->metadata)
6222                         rec->crossing_stripes = check_crossing_stripes(
6223                                         global_info, rec->start,
6224                                         global_info->nodesize);
6225                 check_extent_type(rec);
6226                 maybe_free_extent_rec(extent_cache, rec);
6227                 return ret;
6228         }
6229
6230         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6231
6232         return ret;
6233 }
6234
6235 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6236                             u64 parent, u64 root, int found_ref)
6237 {
6238         struct extent_record *rec;
6239         struct tree_backref *back;
6240         struct cache_extent *cache;
6241         int ret;
6242
6243         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6244         if (!cache) {
6245                 struct extent_record tmpl;
6246
6247                 memset(&tmpl, 0, sizeof(tmpl));
6248                 tmpl.start = bytenr;
6249                 tmpl.nr = 1;
6250                 tmpl.metadata = 1;
6251                 tmpl.max_size = 1;
6252
6253                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6254                 if (ret)
6255                         return ret;
6256
6257                 /* really a bug in cache_extent implement now */
6258                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6259                 if (!cache)
6260                         return -ENOENT;
6261         }
6262
6263         rec = container_of(cache, struct extent_record, cache);
6264         if (rec->start != bytenr) {
6265                 /*
6266                  * Several cause, from unaligned bytenr to over lapping extents
6267                  */
6268                 return -EEXIST;
6269         }
6270
6271         back = find_tree_backref(rec, parent, root);
6272         if (!back) {
6273                 back = alloc_tree_backref(rec, parent, root);
6274                 if (!back)
6275                         return -ENOMEM;
6276         }
6277
6278         if (found_ref) {
6279                 if (back->node.found_ref) {
6280                         fprintf(stderr, "Extent back ref already exists "
6281                                 "for %llu parent %llu root %llu \n",
6282                                 (unsigned long long)bytenr,
6283                                 (unsigned long long)parent,
6284                                 (unsigned long long)root);
6285                 }
6286                 back->node.found_ref = 1;
6287         } else {
6288                 if (back->node.found_extent_tree) {
6289                         fprintf(stderr, "Extent back ref already exists "
6290                                 "for %llu parent %llu root %llu \n",
6291                                 (unsigned long long)bytenr,
6292                                 (unsigned long long)parent,
6293                                 (unsigned long long)root);
6294                 }
6295                 back->node.found_extent_tree = 1;
6296         }
6297         check_extent_type(rec);
6298         maybe_free_extent_rec(extent_cache, rec);
6299         return 0;
6300 }
6301
6302 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6303                             u64 parent, u64 root, u64 owner, u64 offset,
6304                             u32 num_refs, int found_ref, u64 max_size)
6305 {
6306         struct extent_record *rec;
6307         struct data_backref *back;
6308         struct cache_extent *cache;
6309         int ret;
6310
6311         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6312         if (!cache) {
6313                 struct extent_record tmpl;
6314
6315                 memset(&tmpl, 0, sizeof(tmpl));
6316                 tmpl.start = bytenr;
6317                 tmpl.nr = 1;
6318                 tmpl.max_size = max_size;
6319
6320                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6321                 if (ret)
6322                         return ret;
6323
6324                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6325                 if (!cache)
6326                         abort();
6327         }
6328
6329         rec = container_of(cache, struct extent_record, cache);
6330         if (rec->max_size < max_size)
6331                 rec->max_size = max_size;
6332
6333         /*
6334          * If found_ref is set then max_size is the real size and must match the
6335          * existing refs.  So if we have already found a ref then we need to
6336          * make sure that this ref matches the existing one, otherwise we need
6337          * to add a new backref so we can notice that the backrefs don't match
6338          * and we need to figure out who is telling the truth.  This is to
6339          * account for that awful fsync bug I introduced where we'd end up with
6340          * a btrfs_file_extent_item that would have its length include multiple
6341          * prealloc extents or point inside of a prealloc extent.
6342          */
6343         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6344                                  bytenr, max_size);
6345         if (!back) {
6346                 back = alloc_data_backref(rec, parent, root, owner, offset,
6347                                           max_size);
6348                 BUG_ON(!back);
6349         }
6350
6351         if (found_ref) {
6352                 BUG_ON(num_refs != 1);
6353                 if (back->node.found_ref)
6354                         BUG_ON(back->bytes != max_size);
6355                 back->node.found_ref = 1;
6356                 back->found_ref += 1;
6357                 back->bytes = max_size;
6358                 back->disk_bytenr = bytenr;
6359                 rec->refs += 1;
6360                 rec->content_checked = 1;
6361                 rec->owner_ref_checked = 1;
6362         } else {
6363                 if (back->node.found_extent_tree) {
6364                         fprintf(stderr, "Extent back ref already exists "
6365                                 "for %llu parent %llu root %llu "
6366                                 "owner %llu offset %llu num_refs %lu\n",
6367                                 (unsigned long long)bytenr,
6368                                 (unsigned long long)parent,
6369                                 (unsigned long long)root,
6370                                 (unsigned long long)owner,
6371                                 (unsigned long long)offset,
6372                                 (unsigned long)num_refs);
6373                 }
6374                 back->num_refs = num_refs;
6375                 back->node.found_extent_tree = 1;
6376         }
6377         maybe_free_extent_rec(extent_cache, rec);
6378         return 0;
6379 }
6380
6381 static int add_pending(struct cache_tree *pending,
6382                        struct cache_tree *seen, u64 bytenr, u32 size)
6383 {
6384         int ret;
6385         ret = add_cache_extent(seen, bytenr, size);
6386         if (ret)
6387                 return ret;
6388         add_cache_extent(pending, bytenr, size);
6389         return 0;
6390 }
6391
6392 static int pick_next_pending(struct cache_tree *pending,
6393                         struct cache_tree *reada,
6394                         struct cache_tree *nodes,
6395                         u64 last, struct block_info *bits, int bits_nr,
6396                         int *reada_bits)
6397 {
6398         unsigned long node_start = last;
6399         struct cache_extent *cache;
6400         int ret;
6401
6402         cache = search_cache_extent(reada, 0);
6403         if (cache) {
6404                 bits[0].start = cache->start;
6405                 bits[0].size = cache->size;
6406                 *reada_bits = 1;
6407                 return 1;
6408         }
6409         *reada_bits = 0;
6410         if (node_start > 32768)
6411                 node_start -= 32768;
6412
6413         cache = search_cache_extent(nodes, node_start);
6414         if (!cache)
6415                 cache = search_cache_extent(nodes, 0);
6416
6417         if (!cache) {
6418                  cache = search_cache_extent(pending, 0);
6419                  if (!cache)
6420                          return 0;
6421                  ret = 0;
6422                  do {
6423                          bits[ret].start = cache->start;
6424                          bits[ret].size = cache->size;
6425                          cache = next_cache_extent(cache);
6426                          ret++;
6427                  } while (cache && ret < bits_nr);
6428                  return ret;
6429         }
6430
6431         ret = 0;
6432         do {
6433                 bits[ret].start = cache->start;
6434                 bits[ret].size = cache->size;
6435                 cache = next_cache_extent(cache);
6436                 ret++;
6437         } while (cache && ret < bits_nr);
6438
6439         if (bits_nr - ret > 8) {
6440                 u64 lookup = bits[0].start + bits[0].size;
6441                 struct cache_extent *next;
6442                 next = search_cache_extent(pending, lookup);
6443                 while(next) {
6444                         if (next->start - lookup > 32768)
6445                                 break;
6446                         bits[ret].start = next->start;
6447                         bits[ret].size = next->size;
6448                         lookup = next->start + next->size;
6449                         ret++;
6450                         if (ret == bits_nr)
6451                                 break;
6452                         next = next_cache_extent(next);
6453                         if (!next)
6454                                 break;
6455                 }
6456         }
6457         return ret;
6458 }
6459
6460 static void free_chunk_record(struct cache_extent *cache)
6461 {
6462         struct chunk_record *rec;
6463
6464         rec = container_of(cache, struct chunk_record, cache);
6465         list_del_init(&rec->list);
6466         list_del_init(&rec->dextents);
6467         free(rec);
6468 }
6469
6470 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6471 {
6472         cache_tree_free_extents(chunk_cache, free_chunk_record);
6473 }
6474
6475 static void free_device_record(struct rb_node *node)
6476 {
6477         struct device_record *rec;
6478
6479         rec = container_of(node, struct device_record, node);
6480         free(rec);
6481 }
6482
6483 FREE_RB_BASED_TREE(device_cache, free_device_record);
6484
6485 int insert_block_group_record(struct block_group_tree *tree,
6486                               struct block_group_record *bg_rec)
6487 {
6488         int ret;
6489
6490         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6491         if (ret)
6492                 return ret;
6493
6494         list_add_tail(&bg_rec->list, &tree->block_groups);
6495         return 0;
6496 }
6497
6498 static void free_block_group_record(struct cache_extent *cache)
6499 {
6500         struct block_group_record *rec;
6501
6502         rec = container_of(cache, struct block_group_record, cache);
6503         list_del_init(&rec->list);
6504         free(rec);
6505 }
6506
6507 void free_block_group_tree(struct block_group_tree *tree)
6508 {
6509         cache_tree_free_extents(&tree->tree, free_block_group_record);
6510 }
6511
6512 int insert_device_extent_record(struct device_extent_tree *tree,
6513                                 struct device_extent_record *de_rec)
6514 {
6515         int ret;
6516
6517         /*
6518          * Device extent is a bit different from the other extents, because
6519          * the extents which belong to the different devices may have the
6520          * same start and size, so we need use the special extent cache
6521          * search/insert functions.
6522          */
6523         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6524         if (ret)
6525                 return ret;
6526
6527         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6528         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6529         return 0;
6530 }
6531
6532 static void free_device_extent_record(struct cache_extent *cache)
6533 {
6534         struct device_extent_record *rec;
6535
6536         rec = container_of(cache, struct device_extent_record, cache);
6537         if (!list_empty(&rec->chunk_list))
6538                 list_del_init(&rec->chunk_list);
6539         if (!list_empty(&rec->device_list))
6540                 list_del_init(&rec->device_list);
6541         free(rec);
6542 }
6543
6544 void free_device_extent_tree(struct device_extent_tree *tree)
6545 {
6546         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6547 }
6548
6549 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6550 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6551                                  struct extent_buffer *leaf, int slot)
6552 {
6553         struct btrfs_extent_ref_v0 *ref0;
6554         struct btrfs_key key;
6555         int ret;
6556
6557         btrfs_item_key_to_cpu(leaf, &key, slot);
6558         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6559         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6560                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6561                                 0, 0);
6562         } else {
6563                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6564                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6565         }
6566         return ret;
6567 }
6568 #endif
6569
6570 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6571                                             struct btrfs_key *key,
6572                                             int slot)
6573 {
6574         struct btrfs_chunk *ptr;
6575         struct chunk_record *rec;
6576         int num_stripes, i;
6577
6578         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6579         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6580
6581         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6582         if (!rec) {
6583                 fprintf(stderr, "memory allocation failed\n");
6584                 exit(-1);
6585         }
6586
6587         INIT_LIST_HEAD(&rec->list);
6588         INIT_LIST_HEAD(&rec->dextents);
6589         rec->bg_rec = NULL;
6590
6591         rec->cache.start = key->offset;
6592         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6593
6594         rec->generation = btrfs_header_generation(leaf);
6595
6596         rec->objectid = key->objectid;
6597         rec->type = key->type;
6598         rec->offset = key->offset;
6599
6600         rec->length = rec->cache.size;
6601         rec->owner = btrfs_chunk_owner(leaf, ptr);
6602         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6603         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6604         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6605         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6606         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6607         rec->num_stripes = num_stripes;
6608         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6609
6610         for (i = 0; i < rec->num_stripes; ++i) {
6611                 rec->stripes[i].devid =
6612                         btrfs_stripe_devid_nr(leaf, ptr, i);
6613                 rec->stripes[i].offset =
6614                         btrfs_stripe_offset_nr(leaf, ptr, i);
6615                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6616                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6617                                 BTRFS_UUID_SIZE);
6618         }
6619
6620         return rec;
6621 }
6622
6623 static int process_chunk_item(struct cache_tree *chunk_cache,
6624                               struct btrfs_key *key, struct extent_buffer *eb,
6625                               int slot)
6626 {
6627         struct chunk_record *rec;
6628         struct btrfs_chunk *chunk;
6629         int ret = 0;
6630
6631         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6632         /*
6633          * Do extra check for this chunk item,
6634          *
6635          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6636          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6637          * and owner<->key_type check.
6638          */
6639         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6640                                       key->offset);
6641         if (ret < 0) {
6642                 error("chunk(%llu, %llu) is not valid, ignore it",
6643                       key->offset, btrfs_chunk_length(eb, chunk));
6644                 return 0;
6645         }
6646         rec = btrfs_new_chunk_record(eb, key, slot);
6647         ret = insert_cache_extent(chunk_cache, &rec->cache);
6648         if (ret) {
6649                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6650                         rec->offset, rec->length);
6651                 free(rec);
6652         }
6653
6654         return ret;
6655 }
6656
6657 static int process_device_item(struct rb_root *dev_cache,
6658                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6659 {
6660         struct btrfs_dev_item *ptr;
6661         struct device_record *rec;
6662         int ret = 0;
6663
6664         ptr = btrfs_item_ptr(eb,
6665                 slot, struct btrfs_dev_item);
6666
6667         rec = malloc(sizeof(*rec));
6668         if (!rec) {
6669                 fprintf(stderr, "memory allocation failed\n");
6670                 return -ENOMEM;
6671         }
6672
6673         rec->devid = key->offset;
6674         rec->generation = btrfs_header_generation(eb);
6675
6676         rec->objectid = key->objectid;
6677         rec->type = key->type;
6678         rec->offset = key->offset;
6679
6680         rec->devid = btrfs_device_id(eb, ptr);
6681         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6682         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6683
6684         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6685         if (ret) {
6686                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6687                 free(rec);
6688         }
6689
6690         return ret;
6691 }
6692
6693 struct block_group_record *
6694 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6695                              int slot)
6696 {
6697         struct btrfs_block_group_item *ptr;
6698         struct block_group_record *rec;
6699
6700         rec = calloc(1, sizeof(*rec));
6701         if (!rec) {
6702                 fprintf(stderr, "memory allocation failed\n");
6703                 exit(-1);
6704         }
6705
6706         rec->cache.start = key->objectid;
6707         rec->cache.size = key->offset;
6708
6709         rec->generation = btrfs_header_generation(leaf);
6710
6711         rec->objectid = key->objectid;
6712         rec->type = key->type;
6713         rec->offset = key->offset;
6714
6715         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6716         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6717
6718         INIT_LIST_HEAD(&rec->list);
6719
6720         return rec;
6721 }
6722
6723 static int process_block_group_item(struct block_group_tree *block_group_cache,
6724                                     struct btrfs_key *key,
6725                                     struct extent_buffer *eb, int slot)
6726 {
6727         struct block_group_record *rec;
6728         int ret = 0;
6729
6730         rec = btrfs_new_block_group_record(eb, key, slot);
6731         ret = insert_block_group_record(block_group_cache, rec);
6732         if (ret) {
6733                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6734                         rec->objectid, rec->offset);
6735                 free(rec);
6736         }
6737
6738         return ret;
6739 }
6740
6741 struct device_extent_record *
6742 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6743                                struct btrfs_key *key, int slot)
6744 {
6745         struct device_extent_record *rec;
6746         struct btrfs_dev_extent *ptr;
6747
6748         rec = calloc(1, sizeof(*rec));
6749         if (!rec) {
6750                 fprintf(stderr, "memory allocation failed\n");
6751                 exit(-1);
6752         }
6753
6754         rec->cache.objectid = key->objectid;
6755         rec->cache.start = key->offset;
6756
6757         rec->generation = btrfs_header_generation(leaf);
6758
6759         rec->objectid = key->objectid;
6760         rec->type = key->type;
6761         rec->offset = key->offset;
6762
6763         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6764         rec->chunk_objecteid =
6765                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6766         rec->chunk_offset =
6767                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6768         rec->length = btrfs_dev_extent_length(leaf, ptr);
6769         rec->cache.size = rec->length;
6770
6771         INIT_LIST_HEAD(&rec->chunk_list);
6772         INIT_LIST_HEAD(&rec->device_list);
6773
6774         return rec;
6775 }
6776
6777 static int
6778 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6779                            struct btrfs_key *key, struct extent_buffer *eb,
6780                            int slot)
6781 {
6782         struct device_extent_record *rec;
6783         int ret;
6784
6785         rec = btrfs_new_device_extent_record(eb, key, slot);
6786         ret = insert_device_extent_record(dev_extent_cache, rec);
6787         if (ret) {
6788                 fprintf(stderr,
6789                         "Device extent[%llu, %llu, %llu] existed.\n",
6790                         rec->objectid, rec->offset, rec->length);
6791                 free(rec);
6792         }
6793
6794         return ret;
6795 }
6796
6797 static int process_extent_item(struct btrfs_root *root,
6798                                struct cache_tree *extent_cache,
6799                                struct extent_buffer *eb, int slot)
6800 {
6801         struct btrfs_extent_item *ei;
6802         struct btrfs_extent_inline_ref *iref;
6803         struct btrfs_extent_data_ref *dref;
6804         struct btrfs_shared_data_ref *sref;
6805         struct btrfs_key key;
6806         struct extent_record tmpl;
6807         unsigned long end;
6808         unsigned long ptr;
6809         int ret;
6810         int type;
6811         u32 item_size = btrfs_item_size_nr(eb, slot);
6812         u64 refs = 0;
6813         u64 offset;
6814         u64 num_bytes;
6815         int metadata = 0;
6816
6817         btrfs_item_key_to_cpu(eb, &key, slot);
6818
6819         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6820                 metadata = 1;
6821                 num_bytes = root->fs_info->nodesize;
6822         } else {
6823                 num_bytes = key.offset;
6824         }
6825
6826         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6827                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6828                       key.objectid, root->fs_info->sectorsize);
6829                 return -EIO;
6830         }
6831         if (item_size < sizeof(*ei)) {
6832 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6833                 struct btrfs_extent_item_v0 *ei0;
6834                 BUG_ON(item_size != sizeof(*ei0));
6835                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6836                 refs = btrfs_extent_refs_v0(eb, ei0);
6837 #else
6838                 BUG();
6839 #endif
6840                 memset(&tmpl, 0, sizeof(tmpl));
6841                 tmpl.start = key.objectid;
6842                 tmpl.nr = num_bytes;
6843                 tmpl.extent_item_refs = refs;
6844                 tmpl.metadata = metadata;
6845                 tmpl.found_rec = 1;
6846                 tmpl.max_size = num_bytes;
6847
6848                 return add_extent_rec(extent_cache, &tmpl);
6849         }
6850
6851         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6852         refs = btrfs_extent_refs(eb, ei);
6853         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6854                 metadata = 1;
6855         else
6856                 metadata = 0;
6857         if (metadata && num_bytes != root->fs_info->nodesize) {
6858                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6859                       num_bytes, root->fs_info->nodesize);
6860                 return -EIO;
6861         }
6862         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6863                 error("ignore invalid data extent, length %llu is not aligned to %u",
6864                       num_bytes, root->fs_info->sectorsize);
6865                 return -EIO;
6866         }
6867
6868         memset(&tmpl, 0, sizeof(tmpl));
6869         tmpl.start = key.objectid;
6870         tmpl.nr = num_bytes;
6871         tmpl.extent_item_refs = refs;
6872         tmpl.metadata = metadata;
6873         tmpl.found_rec = 1;
6874         tmpl.max_size = num_bytes;
6875         add_extent_rec(extent_cache, &tmpl);
6876
6877         ptr = (unsigned long)(ei + 1);
6878         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6879             key.type == BTRFS_EXTENT_ITEM_KEY)
6880                 ptr += sizeof(struct btrfs_tree_block_info);
6881
6882         end = (unsigned long)ei + item_size;
6883         while (ptr < end) {
6884                 iref = (struct btrfs_extent_inline_ref *)ptr;
6885                 type = btrfs_extent_inline_ref_type(eb, iref);
6886                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6887                 switch (type) {
6888                 case BTRFS_TREE_BLOCK_REF_KEY:
6889                         ret = add_tree_backref(extent_cache, key.objectid,
6890                                         0, offset, 0);
6891                         if (ret < 0)
6892                                 error(
6893                         "add_tree_backref failed (extent items tree block): %s",
6894                                       strerror(-ret));
6895                         break;
6896                 case BTRFS_SHARED_BLOCK_REF_KEY:
6897                         ret = add_tree_backref(extent_cache, key.objectid,
6898                                         offset, 0, 0);
6899                         if (ret < 0)
6900                                 error(
6901                         "add_tree_backref failed (extent items shared block): %s",
6902                                       strerror(-ret));
6903                         break;
6904                 case BTRFS_EXTENT_DATA_REF_KEY:
6905                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6906                         add_data_backref(extent_cache, key.objectid, 0,
6907                                         btrfs_extent_data_ref_root(eb, dref),
6908                                         btrfs_extent_data_ref_objectid(eb,
6909                                                                        dref),
6910                                         btrfs_extent_data_ref_offset(eb, dref),
6911                                         btrfs_extent_data_ref_count(eb, dref),
6912                                         0, num_bytes);
6913                         break;
6914                 case BTRFS_SHARED_DATA_REF_KEY:
6915                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6916                         add_data_backref(extent_cache, key.objectid, offset,
6917                                         0, 0, 0,
6918                                         btrfs_shared_data_ref_count(eb, sref),
6919                                         0, num_bytes);
6920                         break;
6921                 default:
6922                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6923                                 key.objectid, key.type, num_bytes);
6924                         goto out;
6925                 }
6926                 ptr += btrfs_extent_inline_ref_size(type);
6927         }
6928         WARN_ON(ptr > end);
6929 out:
6930         return 0;
6931 }
6932
6933 static int check_cache_range(struct btrfs_root *root,
6934                              struct btrfs_block_group_cache *cache,
6935                              u64 offset, u64 bytes)
6936 {
6937         struct btrfs_free_space *entry;
6938         u64 *logical;
6939         u64 bytenr;
6940         int stripe_len;
6941         int i, nr, ret;
6942
6943         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6944                 bytenr = btrfs_sb_offset(i);
6945                 ret = btrfs_rmap_block(root->fs_info,
6946                                        cache->key.objectid, bytenr, 0,
6947                                        &logical, &nr, &stripe_len);
6948                 if (ret)
6949                         return ret;
6950
6951                 while (nr--) {
6952                         if (logical[nr] + stripe_len <= offset)
6953                                 continue;
6954                         if (offset + bytes <= logical[nr])
6955                                 continue;
6956                         if (logical[nr] == offset) {
6957                                 if (stripe_len >= bytes) {
6958                                         free(logical);
6959                                         return 0;
6960                                 }
6961                                 bytes -= stripe_len;
6962                                 offset += stripe_len;
6963                         } else if (logical[nr] < offset) {
6964                                 if (logical[nr] + stripe_len >=
6965                                     offset + bytes) {
6966                                         free(logical);
6967                                         return 0;
6968                                 }
6969                                 bytes = (offset + bytes) -
6970                                         (logical[nr] + stripe_len);
6971                                 offset = logical[nr] + stripe_len;
6972                         } else {
6973                                 /*
6974                                  * Could be tricky, the super may land in the
6975                                  * middle of the area we're checking.  First
6976                                  * check the easiest case, it's at the end.
6977                                  */
6978                                 if (logical[nr] + stripe_len >=
6979                                     bytes + offset) {
6980                                         bytes = logical[nr] - offset;
6981                                         continue;
6982                                 }
6983
6984                                 /* Check the left side */
6985                                 ret = check_cache_range(root, cache,
6986                                                         offset,
6987                                                         logical[nr] - offset);
6988                                 if (ret) {
6989                                         free(logical);
6990                                         return ret;
6991                                 }
6992
6993                                 /* Now we continue with the right side */
6994                                 bytes = (offset + bytes) -
6995                                         (logical[nr] + stripe_len);
6996                                 offset = logical[nr] + stripe_len;
6997                         }
6998                 }
6999
7000                 free(logical);
7001         }
7002
7003         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7004         if (!entry) {
7005                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7006                         offset, offset+bytes);
7007                 return -EINVAL;
7008         }
7009
7010         if (entry->offset != offset) {
7011                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7012                         entry->offset);
7013                 return -EINVAL;
7014         }
7015
7016         if (entry->bytes != bytes) {
7017                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7018                         bytes, entry->bytes, offset);
7019                 return -EINVAL;
7020         }
7021
7022         unlink_free_space(cache->free_space_ctl, entry);
7023         free(entry);
7024         return 0;
7025 }
7026
7027 static int verify_space_cache(struct btrfs_root *root,
7028                               struct btrfs_block_group_cache *cache)
7029 {
7030         struct btrfs_path path;
7031         struct extent_buffer *leaf;
7032         struct btrfs_key key;
7033         u64 last;
7034         int ret = 0;
7035
7036         root = root->fs_info->extent_root;
7037
7038         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7039
7040         btrfs_init_path(&path);
7041         key.objectid = last;
7042         key.offset = 0;
7043         key.type = BTRFS_EXTENT_ITEM_KEY;
7044         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7045         if (ret < 0)
7046                 goto out;
7047         ret = 0;
7048         while (1) {
7049                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7050                         ret = btrfs_next_leaf(root, &path);
7051                         if (ret < 0)
7052                                 goto out;
7053                         if (ret > 0) {
7054                                 ret = 0;
7055                                 break;
7056                         }
7057                 }
7058                 leaf = path.nodes[0];
7059                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7060                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7061                         break;
7062                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7063                     key.type != BTRFS_METADATA_ITEM_KEY) {
7064                         path.slots[0]++;
7065                         continue;
7066                 }
7067
7068                 if (last == key.objectid) {
7069                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7070                                 last = key.objectid + key.offset;
7071                         else
7072                                 last = key.objectid + root->fs_info->nodesize;
7073                         path.slots[0]++;
7074                         continue;
7075                 }
7076
7077                 ret = check_cache_range(root, cache, last,
7078                                         key.objectid - last);
7079                 if (ret)
7080                         break;
7081                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7082                         last = key.objectid + key.offset;
7083                 else
7084                         last = key.objectid + root->fs_info->nodesize;
7085                 path.slots[0]++;
7086         }
7087
7088         if (last < cache->key.objectid + cache->key.offset)
7089                 ret = check_cache_range(root, cache, last,
7090                                         cache->key.objectid +
7091                                         cache->key.offset - last);
7092
7093 out:
7094         btrfs_release_path(&path);
7095
7096         if (!ret &&
7097             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7098                 fprintf(stderr, "There are still entries left in the space "
7099                         "cache\n");
7100                 ret = -EINVAL;
7101         }
7102
7103         return ret;
7104 }
7105
7106 static int check_space_cache(struct btrfs_root *root)
7107 {
7108         struct btrfs_block_group_cache *cache;
7109         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7110         int ret;
7111         int error = 0;
7112
7113         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7114             btrfs_super_generation(root->fs_info->super_copy) !=
7115             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7116                 printf("cache and super generation don't match, space cache "
7117                        "will be invalidated\n");
7118                 return 0;
7119         }
7120
7121         if (ctx.progress_enabled) {
7122                 ctx.tp = TASK_FREE_SPACE;
7123                 task_start(ctx.info);
7124         }
7125
7126         while (1) {
7127                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7128                 if (!cache)
7129                         break;
7130
7131                 start = cache->key.objectid + cache->key.offset;
7132                 if (!cache->free_space_ctl) {
7133                         if (btrfs_init_free_space_ctl(cache,
7134                                                 root->fs_info->sectorsize)) {
7135                                 ret = -ENOMEM;
7136                                 break;
7137                         }
7138                 } else {
7139                         btrfs_remove_free_space_cache(cache);
7140                 }
7141
7142                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7143                         ret = exclude_super_stripes(root, cache);
7144                         if (ret) {
7145                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7146                                         strerror(-ret));
7147                                 error++;
7148                                 continue;
7149                         }
7150                         ret = load_free_space_tree(root->fs_info, cache);
7151                         free_excluded_extents(root, cache);
7152                         if (ret < 0) {
7153                                 fprintf(stderr, "could not load free space tree: %s\n",
7154                                         strerror(-ret));
7155                                 error++;
7156                                 continue;
7157                         }
7158                         error += ret;
7159                 } else {
7160                         ret = load_free_space_cache(root->fs_info, cache);
7161                         if (!ret)
7162                                 continue;
7163                 }
7164
7165                 ret = verify_space_cache(root, cache);
7166                 if (ret) {
7167                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7168                                 cache->key.objectid);
7169                         error++;
7170                 }
7171         }
7172
7173         task_stop(ctx.info);
7174
7175         return error ? -EINVAL : 0;
7176 }
7177
7178 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7179                         u64 num_bytes, unsigned long leaf_offset,
7180                         struct extent_buffer *eb) {
7181
7182         struct btrfs_fs_info *fs_info = root->fs_info;
7183         u64 offset = 0;
7184         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7185         char *data;
7186         unsigned long csum_offset;
7187         u32 csum;
7188         u32 csum_expected;
7189         u64 read_len;
7190         u64 data_checked = 0;
7191         u64 tmp;
7192         int ret = 0;
7193         int mirror;
7194         int num_copies;
7195
7196         if (num_bytes % fs_info->sectorsize)
7197                 return -EINVAL;
7198
7199         data = malloc(num_bytes);
7200         if (!data)
7201                 return -ENOMEM;
7202
7203         while (offset < num_bytes) {
7204                 mirror = 0;
7205 again:
7206                 read_len = num_bytes - offset;
7207                 /* read as much space once a time */
7208                 ret = read_extent_data(fs_info, data + offset,
7209                                 bytenr + offset, &read_len, mirror);
7210                 if (ret)
7211                         goto out;
7212                 data_checked = 0;
7213                 /* verify every 4k data's checksum */
7214                 while (data_checked < read_len) {
7215                         csum = ~(u32)0;
7216                         tmp = offset + data_checked;
7217
7218                         csum = btrfs_csum_data((char *)data + tmp,
7219                                                csum, fs_info->sectorsize);
7220                         btrfs_csum_final(csum, (u8 *)&csum);
7221
7222                         csum_offset = leaf_offset +
7223                                  tmp / fs_info->sectorsize * csum_size;
7224                         read_extent_buffer(eb, (char *)&csum_expected,
7225                                            csum_offset, csum_size);
7226                         /* try another mirror */
7227                         if (csum != csum_expected) {
7228                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7229                                                 mirror, bytenr + tmp,
7230                                                 csum, csum_expected);
7231                                 num_copies = btrfs_num_copies(root->fs_info,
7232                                                 bytenr, num_bytes);
7233                                 if (mirror < num_copies - 1) {
7234                                         mirror += 1;
7235                                         goto again;
7236                                 }
7237                         }
7238                         data_checked += fs_info->sectorsize;
7239                 }
7240                 offset += read_len;
7241         }
7242 out:
7243         free(data);
7244         return ret;
7245 }
7246
7247 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7248                                u64 num_bytes)
7249 {
7250         struct btrfs_path path;
7251         struct extent_buffer *leaf;
7252         struct btrfs_key key;
7253         int ret;
7254
7255         btrfs_init_path(&path);
7256         key.objectid = bytenr;
7257         key.type = BTRFS_EXTENT_ITEM_KEY;
7258         key.offset = (u64)-1;
7259
7260 again:
7261         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7262                                 0, 0);
7263         if (ret < 0) {
7264                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7265                 btrfs_release_path(&path);
7266                 return ret;
7267         } else if (ret) {
7268                 if (path.slots[0] > 0) {
7269                         path.slots[0]--;
7270                 } else {
7271                         ret = btrfs_prev_leaf(root, &path);
7272                         if (ret < 0) {
7273                                 goto out;
7274                         } else if (ret > 0) {
7275                                 ret = 0;
7276                                 goto out;
7277                         }
7278                 }
7279         }
7280
7281         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7282
7283         /*
7284          * Block group items come before extent items if they have the same
7285          * bytenr, so walk back one more just in case.  Dear future traveller,
7286          * first congrats on mastering time travel.  Now if it's not too much
7287          * trouble could you go back to 2006 and tell Chris to make the
7288          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7289          * EXTENT_ITEM_KEY please?
7290          */
7291         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7292                 if (path.slots[0] > 0) {
7293                         path.slots[0]--;
7294                 } else {
7295                         ret = btrfs_prev_leaf(root, &path);
7296                         if (ret < 0) {
7297                                 goto out;
7298                         } else if (ret > 0) {
7299                                 ret = 0;
7300                                 goto out;
7301                         }
7302                 }
7303                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7304         }
7305
7306         while (num_bytes) {
7307                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7308                         ret = btrfs_next_leaf(root, &path);
7309                         if (ret < 0) {
7310                                 fprintf(stderr, "Error going to next leaf "
7311                                         "%d\n", ret);
7312                                 btrfs_release_path(&path);
7313                                 return ret;
7314                         } else if (ret) {
7315                                 break;
7316                         }
7317                 }
7318                 leaf = path.nodes[0];
7319                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7320                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7321                         path.slots[0]++;
7322                         continue;
7323                 }
7324                 if (key.objectid + key.offset < bytenr) {
7325                         path.slots[0]++;
7326                         continue;
7327                 }
7328                 if (key.objectid > bytenr + num_bytes)
7329                         break;
7330
7331                 if (key.objectid == bytenr) {
7332                         if (key.offset >= num_bytes) {
7333                                 num_bytes = 0;
7334                                 break;
7335                         }
7336                         num_bytes -= key.offset;
7337                         bytenr += key.offset;
7338                 } else if (key.objectid < bytenr) {
7339                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7340                                 num_bytes = 0;
7341                                 break;
7342                         }
7343                         num_bytes = (bytenr + num_bytes) -
7344                                 (key.objectid + key.offset);
7345                         bytenr = key.objectid + key.offset;
7346                 } else {
7347                         if (key.objectid + key.offset < bytenr + num_bytes) {
7348                                 u64 new_start = key.objectid + key.offset;
7349                                 u64 new_bytes = bytenr + num_bytes - new_start;
7350
7351                                 /*
7352                                  * Weird case, the extent is in the middle of
7353                                  * our range, we'll have to search one side
7354                                  * and then the other.  Not sure if this happens
7355                                  * in real life, but no harm in coding it up
7356                                  * anyway just in case.
7357                                  */
7358                                 btrfs_release_path(&path);
7359                                 ret = check_extent_exists(root, new_start,
7360                                                           new_bytes);
7361                                 if (ret) {
7362                                         fprintf(stderr, "Right section didn't "
7363                                                 "have a record\n");
7364                                         break;
7365                                 }
7366                                 num_bytes = key.objectid - bytenr;
7367                                 goto again;
7368                         }
7369                         num_bytes = key.objectid - bytenr;
7370                 }
7371                 path.slots[0]++;
7372         }
7373         ret = 0;
7374
7375 out:
7376         if (num_bytes && !ret) {
7377                 fprintf(stderr, "There are no extents for csum range "
7378                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7379                 ret = 1;
7380         }
7381
7382         btrfs_release_path(&path);
7383         return ret;
7384 }
7385
7386 static int check_csums(struct btrfs_root *root)
7387 {
7388         struct btrfs_path path;
7389         struct extent_buffer *leaf;
7390         struct btrfs_key key;
7391         u64 offset = 0, num_bytes = 0;
7392         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7393         int errors = 0;
7394         int ret;
7395         u64 data_len;
7396         unsigned long leaf_offset;
7397
7398         root = root->fs_info->csum_root;
7399         if (!extent_buffer_uptodate(root->node)) {
7400                 fprintf(stderr, "No valid csum tree found\n");
7401                 return -ENOENT;
7402         }
7403
7404         btrfs_init_path(&path);
7405         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7406         key.type = BTRFS_EXTENT_CSUM_KEY;
7407         key.offset = 0;
7408         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7409         if (ret < 0) {
7410                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7411                 btrfs_release_path(&path);
7412                 return ret;
7413         }
7414
7415         if (ret > 0 && path.slots[0])
7416                 path.slots[0]--;
7417         ret = 0;
7418
7419         while (1) {
7420                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7421                         ret = btrfs_next_leaf(root, &path);
7422                         if (ret < 0) {
7423                                 fprintf(stderr, "Error going to next leaf "
7424                                         "%d\n", ret);
7425                                 break;
7426                         }
7427                         if (ret)
7428                                 break;
7429                 }
7430                 leaf = path.nodes[0];
7431
7432                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7433                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7434                         path.slots[0]++;
7435                         continue;
7436                 }
7437
7438                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7439                               csum_size) * root->fs_info->sectorsize;
7440                 if (!check_data_csum)
7441                         goto skip_csum_check;
7442                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7443                 ret = check_extent_csums(root, key.offset, data_len,
7444                                          leaf_offset, leaf);
7445                 if (ret)
7446                         break;
7447 skip_csum_check:
7448                 if (!num_bytes) {
7449                         offset = key.offset;
7450                 } else if (key.offset != offset + num_bytes) {
7451                         ret = check_extent_exists(root, offset, num_bytes);
7452                         if (ret) {
7453                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7454                                         "there is no extent record\n",
7455                                         offset, offset+num_bytes);
7456                                 errors++;
7457                         }
7458                         offset = key.offset;
7459                         num_bytes = 0;
7460                 }
7461                 num_bytes += data_len;
7462                 path.slots[0]++;
7463         }
7464
7465         btrfs_release_path(&path);
7466         return errors;
7467 }
7468
7469 static int is_dropped_key(struct btrfs_key *key,
7470                           struct btrfs_key *drop_key) {
7471         if (key->objectid < drop_key->objectid)
7472                 return 1;
7473         else if (key->objectid == drop_key->objectid) {
7474                 if (key->type < drop_key->type)
7475                         return 1;
7476                 else if (key->type == drop_key->type) {
7477                         if (key->offset < drop_key->offset)
7478                                 return 1;
7479                 }
7480         }
7481         return 0;
7482 }
7483
7484 /*
7485  * Here are the rules for FULL_BACKREF.
7486  *
7487  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7488  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7489  *      FULL_BACKREF set.
7490  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7491  *    if it happened after the relocation occurred since we'll have dropped the
7492  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7493  *    have no real way to know for sure.
7494  *
7495  * We process the blocks one root at a time, and we start from the lowest root
7496  * objectid and go to the highest.  So we can just lookup the owner backref for
7497  * the record and if we don't find it then we know it doesn't exist and we have
7498  * a FULL BACKREF.
7499  *
7500  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7501  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7502  * be set or not and then we can check later once we've gathered all the refs.
7503  */
7504 static int calc_extent_flag(struct cache_tree *extent_cache,
7505                            struct extent_buffer *buf,
7506                            struct root_item_record *ri,
7507                            u64 *flags)
7508 {
7509         struct extent_record *rec;
7510         struct cache_extent *cache;
7511         struct tree_backref *tback;
7512         u64 owner = 0;
7513
7514         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7515         /* we have added this extent before */
7516         if (!cache)
7517                 return -ENOENT;
7518
7519         rec = container_of(cache, struct extent_record, cache);
7520
7521         /*
7522          * Except file/reloc tree, we can not have
7523          * FULL BACKREF MODE
7524          */
7525         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7526                 goto normal;
7527         /*
7528          * root node
7529          */
7530         if (buf->start == ri->bytenr)
7531                 goto normal;
7532
7533         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7534                 goto full_backref;
7535
7536         owner = btrfs_header_owner(buf);
7537         if (owner == ri->objectid)
7538                 goto normal;
7539
7540         tback = find_tree_backref(rec, 0, owner);
7541         if (!tback)
7542                 goto full_backref;
7543 normal:
7544         *flags = 0;
7545         if (rec->flag_block_full_backref != FLAG_UNSET &&
7546             rec->flag_block_full_backref != 0)
7547                 rec->bad_full_backref = 1;
7548         return 0;
7549 full_backref:
7550         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7551         if (rec->flag_block_full_backref != FLAG_UNSET &&
7552             rec->flag_block_full_backref != 1)
7553                 rec->bad_full_backref = 1;
7554         return 0;
7555 }
7556
7557 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7558 {
7559         fprintf(stderr, "Invalid key type(");
7560         print_key_type(stderr, 0, key_type);
7561         fprintf(stderr, ") found in root(");
7562         print_objectid(stderr, rootid, 0);
7563         fprintf(stderr, ")\n");
7564 }
7565
7566 /*
7567  * Check if the key is valid with its extent buffer.
7568  *
7569  * This is a early check in case invalid key exists in a extent buffer
7570  * This is not comprehensive yet, but should prevent wrong key/item passed
7571  * further
7572  */
7573 static int check_type_with_root(u64 rootid, u8 key_type)
7574 {
7575         switch (key_type) {
7576         /* Only valid in chunk tree */
7577         case BTRFS_DEV_ITEM_KEY:
7578         case BTRFS_CHUNK_ITEM_KEY:
7579                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7580                         goto err;
7581                 break;
7582         /* valid in csum and log tree */
7583         case BTRFS_CSUM_TREE_OBJECTID:
7584                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7585                       is_fstree(rootid)))
7586                         goto err;
7587                 break;
7588         case BTRFS_EXTENT_ITEM_KEY:
7589         case BTRFS_METADATA_ITEM_KEY:
7590         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7591                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7592                         goto err;
7593                 break;
7594         case BTRFS_ROOT_ITEM_KEY:
7595                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7596                         goto err;
7597                 break;
7598         case BTRFS_DEV_EXTENT_KEY:
7599                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7600                         goto err;
7601                 break;
7602         }
7603         return 0;
7604 err:
7605         report_mismatch_key_root(key_type, rootid);
7606         return -EINVAL;
7607 }
7608
7609 static int run_next_block(struct btrfs_root *root,
7610                           struct block_info *bits,
7611                           int bits_nr,
7612                           u64 *last,
7613                           struct cache_tree *pending,
7614                           struct cache_tree *seen,
7615                           struct cache_tree *reada,
7616                           struct cache_tree *nodes,
7617                           struct cache_tree *extent_cache,
7618                           struct cache_tree *chunk_cache,
7619                           struct rb_root *dev_cache,
7620                           struct block_group_tree *block_group_cache,
7621                           struct device_extent_tree *dev_extent_cache,
7622                           struct root_item_record *ri)
7623 {
7624         struct btrfs_fs_info *fs_info = root->fs_info;
7625         struct extent_buffer *buf;
7626         struct extent_record *rec = NULL;
7627         u64 bytenr;
7628         u32 size;
7629         u64 parent;
7630         u64 owner;
7631         u64 flags;
7632         u64 ptr;
7633         u64 gen = 0;
7634         int ret = 0;
7635         int i;
7636         int nritems;
7637         struct btrfs_key key;
7638         struct cache_extent *cache;
7639         int reada_bits;
7640
7641         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7642                                     bits_nr, &reada_bits);
7643         if (nritems == 0)
7644                 return 1;
7645
7646         if (!reada_bits) {
7647                 for(i = 0; i < nritems; i++) {
7648                         ret = add_cache_extent(reada, bits[i].start,
7649                                                bits[i].size);
7650                         if (ret == -EEXIST)
7651                                 continue;
7652
7653                         /* fixme, get the parent transid */
7654                         readahead_tree_block(fs_info, bits[i].start,
7655                                              bits[i].size, 0);
7656                 }
7657         }
7658         *last = bits[0].start;
7659         bytenr = bits[0].start;
7660         size = bits[0].size;
7661
7662         cache = lookup_cache_extent(pending, bytenr, size);
7663         if (cache) {
7664                 remove_cache_extent(pending, cache);
7665                 free(cache);
7666         }
7667         cache = lookup_cache_extent(reada, bytenr, size);
7668         if (cache) {
7669                 remove_cache_extent(reada, cache);
7670                 free(cache);
7671         }
7672         cache = lookup_cache_extent(nodes, bytenr, size);
7673         if (cache) {
7674                 remove_cache_extent(nodes, cache);
7675                 free(cache);
7676         }
7677         cache = lookup_cache_extent(extent_cache, bytenr, size);
7678         if (cache) {
7679                 rec = container_of(cache, struct extent_record, cache);
7680                 gen = rec->parent_generation;
7681         }
7682
7683         /* fixme, get the real parent transid */
7684         buf = read_tree_block(root->fs_info, bytenr, gen);
7685         if (!extent_buffer_uptodate(buf)) {
7686                 record_bad_block_io(root->fs_info,
7687                                     extent_cache, bytenr, size);
7688                 goto out;
7689         }
7690
7691         nritems = btrfs_header_nritems(buf);
7692
7693         flags = 0;
7694         if (!init_extent_tree) {
7695                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7696                                        btrfs_header_level(buf), 1, NULL,
7697                                        &flags);
7698                 if (ret < 0) {
7699                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7700                         if (ret < 0) {
7701                                 fprintf(stderr, "Couldn't calc extent flags\n");
7702                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7703                         }
7704                 }
7705         } else {
7706                 flags = 0;
7707                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7708                 if (ret < 0) {
7709                         fprintf(stderr, "Couldn't calc extent flags\n");
7710                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7711                 }
7712         }
7713
7714         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7715                 if (ri != NULL &&
7716                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7717                     ri->objectid == btrfs_header_owner(buf)) {
7718                         /*
7719                          * Ok we got to this block from it's original owner and
7720                          * we have FULL_BACKREF set.  Relocation can leave
7721                          * converted blocks over so this is altogether possible,
7722                          * however it's not possible if the generation > the
7723                          * last snapshot, so check for this case.
7724                          */
7725                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7726                             btrfs_header_generation(buf) > ri->last_snapshot) {
7727                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7728                                 rec->bad_full_backref = 1;
7729                         }
7730                 }
7731         } else {
7732                 if (ri != NULL &&
7733                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7734                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7735                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7736                         rec->bad_full_backref = 1;
7737                 }
7738         }
7739
7740         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7741                 rec->flag_block_full_backref = 1;
7742                 parent = bytenr;
7743                 owner = 0;
7744         } else {
7745                 rec->flag_block_full_backref = 0;
7746                 parent = 0;
7747                 owner = btrfs_header_owner(buf);
7748         }
7749
7750         ret = check_block(root, extent_cache, buf, flags);
7751         if (ret)
7752                 goto out;
7753
7754         if (btrfs_is_leaf(buf)) {
7755                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7756                 for (i = 0; i < nritems; i++) {
7757                         struct btrfs_file_extent_item *fi;
7758                         btrfs_item_key_to_cpu(buf, &key, i);
7759                         /*
7760                          * Check key type against the leaf owner.
7761                          * Could filter quite a lot of early error if
7762                          * owner is correct
7763                          */
7764                         if (check_type_with_root(btrfs_header_owner(buf),
7765                                                  key.type)) {
7766                                 fprintf(stderr, "ignoring invalid key\n");
7767                                 continue;
7768                         }
7769                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7770                                 process_extent_item(root, extent_cache, buf,
7771                                                     i);
7772                                 continue;
7773                         }
7774                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7775                                 process_extent_item(root, extent_cache, buf,
7776                                                     i);
7777                                 continue;
7778                         }
7779                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7780                                 total_csum_bytes +=
7781                                         btrfs_item_size_nr(buf, i);
7782                                 continue;
7783                         }
7784                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7785                                 process_chunk_item(chunk_cache, &key, buf, i);
7786                                 continue;
7787                         }
7788                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7789                                 process_device_item(dev_cache, &key, buf, i);
7790                                 continue;
7791                         }
7792                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7793                                 process_block_group_item(block_group_cache,
7794                                         &key, buf, i);
7795                                 continue;
7796                         }
7797                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7798                                 process_device_extent_item(dev_extent_cache,
7799                                         &key, buf, i);
7800                                 continue;
7801
7802                         }
7803                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7804 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7805                                 process_extent_ref_v0(extent_cache, buf, i);
7806 #else
7807                                 BUG();
7808 #endif
7809                                 continue;
7810                         }
7811
7812                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7813                                 ret = add_tree_backref(extent_cache,
7814                                                 key.objectid, 0, key.offset, 0);
7815                                 if (ret < 0)
7816                                         error(
7817                                 "add_tree_backref failed (leaf tree block): %s",
7818                                               strerror(-ret));
7819                                 continue;
7820                         }
7821                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7822                                 ret = add_tree_backref(extent_cache,
7823                                                 key.objectid, key.offset, 0, 0);
7824                                 if (ret < 0)
7825                                         error(
7826                                 "add_tree_backref failed (leaf shared block): %s",
7827                                               strerror(-ret));
7828                                 continue;
7829                         }
7830                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7831                                 struct btrfs_extent_data_ref *ref;
7832                                 ref = btrfs_item_ptr(buf, i,
7833                                                 struct btrfs_extent_data_ref);
7834                                 add_data_backref(extent_cache,
7835                                         key.objectid, 0,
7836                                         btrfs_extent_data_ref_root(buf, ref),
7837                                         btrfs_extent_data_ref_objectid(buf,
7838                                                                        ref),
7839                                         btrfs_extent_data_ref_offset(buf, ref),
7840                                         btrfs_extent_data_ref_count(buf, ref),
7841                                         0, root->fs_info->sectorsize);
7842                                 continue;
7843                         }
7844                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7845                                 struct btrfs_shared_data_ref *ref;
7846                                 ref = btrfs_item_ptr(buf, i,
7847                                                 struct btrfs_shared_data_ref);
7848                                 add_data_backref(extent_cache,
7849                                         key.objectid, key.offset, 0, 0, 0,
7850                                         btrfs_shared_data_ref_count(buf, ref),
7851                                         0, root->fs_info->sectorsize);
7852                                 continue;
7853                         }
7854                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7855                                 struct bad_item *bad;
7856
7857                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7858                                         continue;
7859                                 if (!owner)
7860                                         continue;
7861                                 bad = malloc(sizeof(struct bad_item));
7862                                 if (!bad)
7863                                         continue;
7864                                 INIT_LIST_HEAD(&bad->list);
7865                                 memcpy(&bad->key, &key,
7866                                        sizeof(struct btrfs_key));
7867                                 bad->root_id = owner;
7868                                 list_add_tail(&bad->list, &delete_items);
7869                                 continue;
7870                         }
7871                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7872                                 continue;
7873                         fi = btrfs_item_ptr(buf, i,
7874                                             struct btrfs_file_extent_item);
7875                         if (btrfs_file_extent_type(buf, fi) ==
7876                             BTRFS_FILE_EXTENT_INLINE)
7877                                 continue;
7878                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7879                                 continue;
7880
7881                         data_bytes_allocated +=
7882                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7883                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7884                                 abort();
7885                         }
7886                         data_bytes_referenced +=
7887                                 btrfs_file_extent_num_bytes(buf, fi);
7888                         add_data_backref(extent_cache,
7889                                 btrfs_file_extent_disk_bytenr(buf, fi),
7890                                 parent, owner, key.objectid, key.offset -
7891                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7892                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7893                 }
7894         } else {
7895                 int level;
7896                 struct btrfs_key first_key;
7897
7898                 first_key.objectid = 0;
7899
7900                 if (nritems > 0)
7901                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7902                 level = btrfs_header_level(buf);
7903                 for (i = 0; i < nritems; i++) {
7904                         struct extent_record tmpl;
7905
7906                         ptr = btrfs_node_blockptr(buf, i);
7907                         size = root->fs_info->nodesize;
7908                         btrfs_node_key_to_cpu(buf, &key, i);
7909                         if (ri != NULL) {
7910                                 if ((level == ri->drop_level)
7911                                     && is_dropped_key(&key, &ri->drop_key)) {
7912                                         continue;
7913                                 }
7914                         }
7915
7916                         memset(&tmpl, 0, sizeof(tmpl));
7917                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7918                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7919                         tmpl.start = ptr;
7920                         tmpl.nr = size;
7921                         tmpl.refs = 1;
7922                         tmpl.metadata = 1;
7923                         tmpl.max_size = size;
7924                         ret = add_extent_rec(extent_cache, &tmpl);
7925                         if (ret < 0)
7926                                 goto out;
7927
7928                         ret = add_tree_backref(extent_cache, ptr, parent,
7929                                         owner, 1);
7930                         if (ret < 0) {
7931                                 error(
7932                                 "add_tree_backref failed (non-leaf block): %s",
7933                                       strerror(-ret));
7934                                 continue;
7935                         }
7936
7937                         if (level > 1) {
7938                                 add_pending(nodes, seen, ptr, size);
7939                         } else {
7940                                 add_pending(pending, seen, ptr, size);
7941                         }
7942                 }
7943                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7944                                       nritems) * sizeof(struct btrfs_key_ptr);
7945         }
7946         total_btree_bytes += buf->len;
7947         if (fs_root_objectid(btrfs_header_owner(buf)))
7948                 total_fs_tree_bytes += buf->len;
7949         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7950                 total_extent_tree_bytes += buf->len;
7951         if (!found_old_backref &&
7952             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7953             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7954             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7955                 found_old_backref = 1;
7956 out:
7957         free_extent_buffer(buf);
7958         return ret;
7959 }
7960
7961 static int add_root_to_pending(struct extent_buffer *buf,
7962                                struct cache_tree *extent_cache,
7963                                struct cache_tree *pending,
7964                                struct cache_tree *seen,
7965                                struct cache_tree *nodes,
7966                                u64 objectid)
7967 {
7968         struct extent_record tmpl;
7969         int ret;
7970
7971         if (btrfs_header_level(buf) > 0)
7972                 add_pending(nodes, seen, buf->start, buf->len);
7973         else
7974                 add_pending(pending, seen, buf->start, buf->len);
7975
7976         memset(&tmpl, 0, sizeof(tmpl));
7977         tmpl.start = buf->start;
7978         tmpl.nr = buf->len;
7979         tmpl.is_root = 1;
7980         tmpl.refs = 1;
7981         tmpl.metadata = 1;
7982         tmpl.max_size = buf->len;
7983         add_extent_rec(extent_cache, &tmpl);
7984
7985         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7986             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7987                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7988                                 0, 1);
7989         else
7990                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7991                                 1);
7992         return ret;
7993 }
7994
7995 /* as we fix the tree, we might be deleting blocks that
7996  * we're tracking for repair.  This hook makes sure we
7997  * remove any backrefs for blocks as we are fixing them.
7998  */
7999 static int free_extent_hook(struct btrfs_trans_handle *trans,
8000                             struct btrfs_root *root,
8001                             u64 bytenr, u64 num_bytes, u64 parent,
8002                             u64 root_objectid, u64 owner, u64 offset,
8003                             int refs_to_drop)
8004 {
8005         struct extent_record *rec;
8006         struct cache_extent *cache;
8007         int is_data;
8008         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8009
8010         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8011         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8012         if (!cache)
8013                 return 0;
8014
8015         rec = container_of(cache, struct extent_record, cache);
8016         if (is_data) {
8017                 struct data_backref *back;
8018                 back = find_data_backref(rec, parent, root_objectid, owner,
8019                                          offset, 1, bytenr, num_bytes);
8020                 if (!back)
8021                         goto out;
8022                 if (back->node.found_ref) {
8023                         back->found_ref -= refs_to_drop;
8024                         if (rec->refs)
8025                                 rec->refs -= refs_to_drop;
8026                 }
8027                 if (back->node.found_extent_tree) {
8028                         back->num_refs -= refs_to_drop;
8029                         if (rec->extent_item_refs)
8030                                 rec->extent_item_refs -= refs_to_drop;
8031                 }
8032                 if (back->found_ref == 0)
8033                         back->node.found_ref = 0;
8034                 if (back->num_refs == 0)
8035                         back->node.found_extent_tree = 0;
8036
8037                 if (!back->node.found_extent_tree && back->node.found_ref) {
8038                         list_del(&back->node.list);
8039                         free(back);
8040                 }
8041         } else {
8042                 struct tree_backref *back;
8043                 back = find_tree_backref(rec, parent, root_objectid);
8044                 if (!back)
8045                         goto out;
8046                 if (back->node.found_ref) {
8047                         if (rec->refs)
8048                                 rec->refs--;
8049                         back->node.found_ref = 0;
8050                 }
8051                 if (back->node.found_extent_tree) {
8052                         if (rec->extent_item_refs)
8053                                 rec->extent_item_refs--;
8054                         back->node.found_extent_tree = 0;
8055                 }
8056                 if (!back->node.found_extent_tree && back->node.found_ref) {
8057                         list_del(&back->node.list);
8058                         free(back);
8059                 }
8060         }
8061         maybe_free_extent_rec(extent_cache, rec);
8062 out:
8063         return 0;
8064 }
8065
8066 static int delete_extent_records(struct btrfs_trans_handle *trans,
8067                                  struct btrfs_root *root,
8068                                  struct btrfs_path *path,
8069                                  u64 bytenr)
8070 {
8071         struct btrfs_key key;
8072         struct btrfs_key found_key;
8073         struct extent_buffer *leaf;
8074         int ret;
8075         int slot;
8076
8077
8078         key.objectid = bytenr;
8079         key.type = (u8)-1;
8080         key.offset = (u64)-1;
8081
8082         while(1) {
8083                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8084                                         &key, path, 0, 1);
8085                 if (ret < 0)
8086                         break;
8087
8088                 if (ret > 0) {
8089                         ret = 0;
8090                         if (path->slots[0] == 0)
8091                                 break;
8092                         path->slots[0]--;
8093                 }
8094                 ret = 0;
8095
8096                 leaf = path->nodes[0];
8097                 slot = path->slots[0];
8098
8099                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8100                 if (found_key.objectid != bytenr)
8101                         break;
8102
8103                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8104                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8105                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8106                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8107                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8108                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8109                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8110                         btrfs_release_path(path);
8111                         if (found_key.type == 0) {
8112                                 if (found_key.offset == 0)
8113                                         break;
8114                                 key.offset = found_key.offset - 1;
8115                                 key.type = found_key.type;
8116                         }
8117                         key.type = found_key.type - 1;
8118                         key.offset = (u64)-1;
8119                         continue;
8120                 }
8121
8122                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8123                         found_key.objectid, found_key.type, found_key.offset);
8124
8125                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8126                 if (ret)
8127                         break;
8128                 btrfs_release_path(path);
8129
8130                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8131                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8132                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8133                                 found_key.offset : root->fs_info->nodesize;
8134
8135                         ret = btrfs_update_block_group(trans, root, bytenr,
8136                                                        bytes, 0, 0);
8137                         if (ret)
8138                                 break;
8139                 }
8140         }
8141
8142         btrfs_release_path(path);
8143         return ret;
8144 }
8145
8146 /*
8147  * for a single backref, this will allocate a new extent
8148  * and add the backref to it.
8149  */
8150 static int record_extent(struct btrfs_trans_handle *trans,
8151                          struct btrfs_fs_info *info,
8152                          struct btrfs_path *path,
8153                          struct extent_record *rec,
8154                          struct extent_backref *back,
8155                          int allocated, u64 flags)
8156 {
8157         int ret = 0;
8158         struct btrfs_root *extent_root = info->extent_root;
8159         struct extent_buffer *leaf;
8160         struct btrfs_key ins_key;
8161         struct btrfs_extent_item *ei;
8162         struct data_backref *dback;
8163         struct btrfs_tree_block_info *bi;
8164
8165         if (!back->is_data)
8166                 rec->max_size = max_t(u64, rec->max_size,
8167                                     info->nodesize);
8168
8169         if (!allocated) {
8170                 u32 item_size = sizeof(*ei);
8171
8172                 if (!back->is_data)
8173                         item_size += sizeof(*bi);
8174
8175                 ins_key.objectid = rec->start;
8176                 ins_key.offset = rec->max_size;
8177                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8178
8179                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8180                                         &ins_key, item_size);
8181                 if (ret)
8182                         goto fail;
8183
8184                 leaf = path->nodes[0];
8185                 ei = btrfs_item_ptr(leaf, path->slots[0],
8186                                     struct btrfs_extent_item);
8187
8188                 btrfs_set_extent_refs(leaf, ei, 0);
8189                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8190
8191                 if (back->is_data) {
8192                         btrfs_set_extent_flags(leaf, ei,
8193                                                BTRFS_EXTENT_FLAG_DATA);
8194                 } else {
8195                         struct btrfs_disk_key copy_key;;
8196
8197                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8198                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8199                                              sizeof(*bi));
8200
8201                         btrfs_set_disk_key_objectid(&copy_key,
8202                                                     rec->info_objectid);
8203                         btrfs_set_disk_key_type(&copy_key, 0);
8204                         btrfs_set_disk_key_offset(&copy_key, 0);
8205
8206                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8207                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8208
8209                         btrfs_set_extent_flags(leaf, ei,
8210                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8211                 }
8212
8213                 btrfs_mark_buffer_dirty(leaf);
8214                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8215                                                rec->max_size, 1, 0);
8216                 if (ret)
8217                         goto fail;
8218                 btrfs_release_path(path);
8219         }
8220
8221         if (back->is_data) {
8222                 u64 parent;
8223                 int i;
8224
8225                 dback = to_data_backref(back);
8226                 if (back->full_backref)
8227                         parent = dback->parent;
8228                 else
8229                         parent = 0;
8230
8231                 for (i = 0; i < dback->found_ref; i++) {
8232                         /* if parent != 0, we're doing a full backref
8233                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8234                          * just makes the backref allocator create a data
8235                          * backref
8236                          */
8237                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8238                                                    rec->start, rec->max_size,
8239                                                    parent,
8240                                                    dback->root,
8241                                                    parent ?
8242                                                    BTRFS_FIRST_FREE_OBJECTID :
8243                                                    dback->owner,
8244                                                    dback->offset);
8245                         if (ret)
8246                                 break;
8247                 }
8248                 fprintf(stderr, "adding new data backref"
8249                                 " on %llu %s %llu owner %llu"
8250                                 " offset %llu found %d\n",
8251                                 (unsigned long long)rec->start,
8252                                 back->full_backref ?
8253                                 "parent" : "root",
8254                                 back->full_backref ?
8255                                 (unsigned long long)parent :
8256                                 (unsigned long long)dback->root,
8257                                 (unsigned long long)dback->owner,
8258                                 (unsigned long long)dback->offset,
8259                                 dback->found_ref);
8260         } else {
8261                 u64 parent;
8262                 struct tree_backref *tback;
8263
8264                 tback = to_tree_backref(back);
8265                 if (back->full_backref)
8266                         parent = tback->parent;
8267                 else
8268                         parent = 0;
8269
8270                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8271                                            rec->start, rec->max_size,
8272                                            parent, tback->root, 0, 0);
8273                 fprintf(stderr, "adding new tree backref on "
8274                         "start %llu len %llu parent %llu root %llu\n",
8275                         rec->start, rec->max_size, parent, tback->root);
8276         }
8277 fail:
8278         btrfs_release_path(path);
8279         return ret;
8280 }
8281
8282 static struct extent_entry *find_entry(struct list_head *entries,
8283                                        u64 bytenr, u64 bytes)
8284 {
8285         struct extent_entry *entry = NULL;
8286
8287         list_for_each_entry(entry, entries, list) {
8288                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8289                         return entry;
8290         }
8291
8292         return NULL;
8293 }
8294
8295 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8296 {
8297         struct extent_entry *entry, *best = NULL, *prev = NULL;
8298
8299         list_for_each_entry(entry, entries, list) {
8300                 /*
8301                  * If there are as many broken entries as entries then we know
8302                  * not to trust this particular entry.
8303                  */
8304                 if (entry->broken == entry->count)
8305                         continue;
8306
8307                 /*
8308                  * Special case, when there are only two entries and 'best' is
8309                  * the first one
8310                  */
8311                 if (!prev) {
8312                         best = entry;
8313                         prev = entry;
8314                         continue;
8315                 }
8316
8317                 /*
8318                  * If our current entry == best then we can't be sure our best
8319                  * is really the best, so we need to keep searching.
8320                  */
8321                 if (best && best->count == entry->count) {
8322                         prev = entry;
8323                         best = NULL;
8324                         continue;
8325                 }
8326
8327                 /* Prev == entry, not good enough, have to keep searching */
8328                 if (!prev->broken && prev->count == entry->count)
8329                         continue;
8330
8331                 if (!best)
8332                         best = (prev->count > entry->count) ? prev : entry;
8333                 else if (best->count < entry->count)
8334                         best = entry;
8335                 prev = entry;
8336         }
8337
8338         return best;
8339 }
8340
8341 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8342                       struct data_backref *dback, struct extent_entry *entry)
8343 {
8344         struct btrfs_trans_handle *trans;
8345         struct btrfs_root *root;
8346         struct btrfs_file_extent_item *fi;
8347         struct extent_buffer *leaf;
8348         struct btrfs_key key;
8349         u64 bytenr, bytes;
8350         int ret, err;
8351
8352         key.objectid = dback->root;
8353         key.type = BTRFS_ROOT_ITEM_KEY;
8354         key.offset = (u64)-1;
8355         root = btrfs_read_fs_root(info, &key);
8356         if (IS_ERR(root)) {
8357                 fprintf(stderr, "Couldn't find root for our ref\n");
8358                 return -EINVAL;
8359         }
8360
8361         /*
8362          * The backref points to the original offset of the extent if it was
8363          * split, so we need to search down to the offset we have and then walk
8364          * forward until we find the backref we're looking for.
8365          */
8366         key.objectid = dback->owner;
8367         key.type = BTRFS_EXTENT_DATA_KEY;
8368         key.offset = dback->offset;
8369         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8370         if (ret < 0) {
8371                 fprintf(stderr, "Error looking up ref %d\n", ret);
8372                 return ret;
8373         }
8374
8375         while (1) {
8376                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8377                         ret = btrfs_next_leaf(root, path);
8378                         if (ret) {
8379                                 fprintf(stderr, "Couldn't find our ref, next\n");
8380                                 return -EINVAL;
8381                         }
8382                 }
8383                 leaf = path->nodes[0];
8384                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8385                 if (key.objectid != dback->owner ||
8386                     key.type != BTRFS_EXTENT_DATA_KEY) {
8387                         fprintf(stderr, "Couldn't find our ref, search\n");
8388                         return -EINVAL;
8389                 }
8390                 fi = btrfs_item_ptr(leaf, path->slots[0],
8391                                     struct btrfs_file_extent_item);
8392                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8393                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8394
8395                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8396                         break;
8397                 path->slots[0]++;
8398         }
8399
8400         btrfs_release_path(path);
8401
8402         trans = btrfs_start_transaction(root, 1);
8403         if (IS_ERR(trans))
8404                 return PTR_ERR(trans);
8405
8406         /*
8407          * Ok we have the key of the file extent we want to fix, now we can cow
8408          * down to the thing and fix it.
8409          */
8410         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8411         if (ret < 0) {
8412                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8413                         key.objectid, key.type, key.offset, ret);
8414                 goto out;
8415         }
8416         if (ret > 0) {
8417                 fprintf(stderr, "Well that's odd, we just found this key "
8418                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8419                         key.offset);
8420                 ret = -EINVAL;
8421                 goto out;
8422         }
8423         leaf = path->nodes[0];
8424         fi = btrfs_item_ptr(leaf, path->slots[0],
8425                             struct btrfs_file_extent_item);
8426
8427         if (btrfs_file_extent_compression(leaf, fi) &&
8428             dback->disk_bytenr != entry->bytenr) {
8429                 fprintf(stderr, "Ref doesn't match the record start and is "
8430                         "compressed, please take a btrfs-image of this file "
8431                         "system and send it to a btrfs developer so they can "
8432                         "complete this functionality for bytenr %Lu\n",
8433                         dback->disk_bytenr);
8434                 ret = -EINVAL;
8435                 goto out;
8436         }
8437
8438         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8439                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8440         } else if (dback->disk_bytenr > entry->bytenr) {
8441                 u64 off_diff, offset;
8442
8443                 off_diff = dback->disk_bytenr - entry->bytenr;
8444                 offset = btrfs_file_extent_offset(leaf, fi);
8445                 if (dback->disk_bytenr + offset +
8446                     btrfs_file_extent_num_bytes(leaf, fi) >
8447                     entry->bytenr + entry->bytes) {
8448                         fprintf(stderr, "Ref is past the entry end, please "
8449                                 "take a btrfs-image of this file system and "
8450                                 "send it to a btrfs developer, ref %Lu\n",
8451                                 dback->disk_bytenr);
8452                         ret = -EINVAL;
8453                         goto out;
8454                 }
8455                 offset += off_diff;
8456                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8457                 btrfs_set_file_extent_offset(leaf, fi, offset);
8458         } else if (dback->disk_bytenr < entry->bytenr) {
8459                 u64 offset;
8460
8461                 offset = btrfs_file_extent_offset(leaf, fi);
8462                 if (dback->disk_bytenr + offset < entry->bytenr) {
8463                         fprintf(stderr, "Ref is before the entry start, please"
8464                                 " take a btrfs-image of this file system and "
8465                                 "send it to a btrfs developer, ref %Lu\n",
8466                                 dback->disk_bytenr);
8467                         ret = -EINVAL;
8468                         goto out;
8469                 }
8470
8471                 offset += dback->disk_bytenr;
8472                 offset -= entry->bytenr;
8473                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8474                 btrfs_set_file_extent_offset(leaf, fi, offset);
8475         }
8476
8477         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8478
8479         /*
8480          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8481          * only do this if we aren't using compression, otherwise it's a
8482          * trickier case.
8483          */
8484         if (!btrfs_file_extent_compression(leaf, fi))
8485                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8486         else
8487                 printf("ram bytes may be wrong?\n");
8488         btrfs_mark_buffer_dirty(leaf);
8489 out:
8490         err = btrfs_commit_transaction(trans, root);
8491         btrfs_release_path(path);
8492         return ret ? ret : err;
8493 }
8494
8495 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8496                            struct extent_record *rec)
8497 {
8498         struct extent_backref *back;
8499         struct data_backref *dback;
8500         struct extent_entry *entry, *best = NULL;
8501         LIST_HEAD(entries);
8502         int nr_entries = 0;
8503         int broken_entries = 0;
8504         int ret = 0;
8505         short mismatch = 0;
8506
8507         /*
8508          * Metadata is easy and the backrefs should always agree on bytenr and
8509          * size, if not we've got bigger issues.
8510          */
8511         if (rec->metadata)
8512                 return 0;
8513
8514         list_for_each_entry(back, &rec->backrefs, list) {
8515                 if (back->full_backref || !back->is_data)
8516                         continue;
8517
8518                 dback = to_data_backref(back);
8519
8520                 /*
8521                  * We only pay attention to backrefs that we found a real
8522                  * backref for.
8523                  */
8524                 if (dback->found_ref == 0)
8525                         continue;
8526
8527                 /*
8528                  * For now we only catch when the bytes don't match, not the
8529                  * bytenr.  We can easily do this at the same time, but I want
8530                  * to have a fs image to test on before we just add repair
8531                  * functionality willy-nilly so we know we won't screw up the
8532                  * repair.
8533                  */
8534
8535                 entry = find_entry(&entries, dback->disk_bytenr,
8536                                    dback->bytes);
8537                 if (!entry) {
8538                         entry = malloc(sizeof(struct extent_entry));
8539                         if (!entry) {
8540                                 ret = -ENOMEM;
8541                                 goto out;
8542                         }
8543                         memset(entry, 0, sizeof(*entry));
8544                         entry->bytenr = dback->disk_bytenr;
8545                         entry->bytes = dback->bytes;
8546                         list_add_tail(&entry->list, &entries);
8547                         nr_entries++;
8548                 }
8549
8550                 /*
8551                  * If we only have on entry we may think the entries agree when
8552                  * in reality they don't so we have to do some extra checking.
8553                  */
8554                 if (dback->disk_bytenr != rec->start ||
8555                     dback->bytes != rec->nr || back->broken)
8556                         mismatch = 1;
8557
8558                 if (back->broken) {
8559                         entry->broken++;
8560                         broken_entries++;
8561                 }
8562
8563                 entry->count++;
8564         }
8565
8566         /* Yay all the backrefs agree, carry on good sir */
8567         if (nr_entries <= 1 && !mismatch)
8568                 goto out;
8569
8570         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8571                 "%Lu\n", rec->start);
8572
8573         /*
8574          * First we want to see if the backrefs can agree amongst themselves who
8575          * is right, so figure out which one of the entries has the highest
8576          * count.
8577          */
8578         best = find_most_right_entry(&entries);
8579
8580         /*
8581          * Ok so we may have an even split between what the backrefs think, so
8582          * this is where we use the extent ref to see what it thinks.
8583          */
8584         if (!best) {
8585                 entry = find_entry(&entries, rec->start, rec->nr);
8586                 if (!entry && (!broken_entries || !rec->found_rec)) {
8587                         fprintf(stderr, "Backrefs don't agree with each other "
8588                                 "and extent record doesn't agree with anybody,"
8589                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8590                                 rec->start, rec->nr);
8591                         ret = -EINVAL;
8592                         goto out;
8593                 } else if (!entry) {
8594                         /*
8595                          * Ok our backrefs were broken, we'll assume this is the
8596                          * correct value and add an entry for this range.
8597                          */
8598                         entry = malloc(sizeof(struct extent_entry));
8599                         if (!entry) {
8600                                 ret = -ENOMEM;
8601                                 goto out;
8602                         }
8603                         memset(entry, 0, sizeof(*entry));
8604                         entry->bytenr = rec->start;
8605                         entry->bytes = rec->nr;
8606                         list_add_tail(&entry->list, &entries);
8607                         nr_entries++;
8608                 }
8609                 entry->count++;
8610                 best = find_most_right_entry(&entries);
8611                 if (!best) {
8612                         fprintf(stderr, "Backrefs and extent record evenly "
8613                                 "split on who is right, this is going to "
8614                                 "require user input to fix bytenr %Lu bytes "
8615                                 "%Lu\n", rec->start, rec->nr);
8616                         ret = -EINVAL;
8617                         goto out;
8618                 }
8619         }
8620
8621         /*
8622          * I don't think this can happen currently as we'll abort() if we catch
8623          * this case higher up, but in case somebody removes that we still can't
8624          * deal with it properly here yet, so just bail out of that's the case.
8625          */
8626         if (best->bytenr != rec->start) {
8627                 fprintf(stderr, "Extent start and backref starts don't match, "
8628                         "please use btrfs-image on this file system and send "
8629                         "it to a btrfs developer so they can make fsck fix "
8630                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8631                         rec->start, rec->nr);
8632                 ret = -EINVAL;
8633                 goto out;
8634         }
8635
8636         /*
8637          * Ok great we all agreed on an extent record, let's go find the real
8638          * references and fix up the ones that don't match.
8639          */
8640         list_for_each_entry(back, &rec->backrefs, list) {
8641                 if (back->full_backref || !back->is_data)
8642                         continue;
8643
8644                 dback = to_data_backref(back);
8645
8646                 /*
8647                  * Still ignoring backrefs that don't have a real ref attached
8648                  * to them.
8649                  */
8650                 if (dback->found_ref == 0)
8651                         continue;
8652
8653                 if (dback->bytes == best->bytes &&
8654                     dback->disk_bytenr == best->bytenr)
8655                         continue;
8656
8657                 ret = repair_ref(info, path, dback, best);
8658                 if (ret)
8659                         goto out;
8660         }
8661
8662         /*
8663          * Ok we messed with the actual refs, which means we need to drop our
8664          * entire cache and go back and rescan.  I know this is a huge pain and
8665          * adds a lot of extra work, but it's the only way to be safe.  Once all
8666          * the backrefs agree we may not need to do anything to the extent
8667          * record itself.
8668          */
8669         ret = -EAGAIN;
8670 out:
8671         while (!list_empty(&entries)) {
8672                 entry = list_entry(entries.next, struct extent_entry, list);
8673                 list_del_init(&entry->list);
8674                 free(entry);
8675         }
8676         return ret;
8677 }
8678
8679 static int process_duplicates(struct cache_tree *extent_cache,
8680                               struct extent_record *rec)
8681 {
8682         struct extent_record *good, *tmp;
8683         struct cache_extent *cache;
8684         int ret;
8685
8686         /*
8687          * If we found a extent record for this extent then return, or if we
8688          * have more than one duplicate we are likely going to need to delete
8689          * something.
8690          */
8691         if (rec->found_rec || rec->num_duplicates > 1)
8692                 return 0;
8693
8694         /* Shouldn't happen but just in case */
8695         BUG_ON(!rec->num_duplicates);
8696
8697         /*
8698          * So this happens if we end up with a backref that doesn't match the
8699          * actual extent entry.  So either the backref is bad or the extent
8700          * entry is bad.  Either way we want to have the extent_record actually
8701          * reflect what we found in the extent_tree, so we need to take the
8702          * duplicate out and use that as the extent_record since the only way we
8703          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8704          */
8705         remove_cache_extent(extent_cache, &rec->cache);
8706
8707         good = to_extent_record(rec->dups.next);
8708         list_del_init(&good->list);
8709         INIT_LIST_HEAD(&good->backrefs);
8710         INIT_LIST_HEAD(&good->dups);
8711         good->cache.start = good->start;
8712         good->cache.size = good->nr;
8713         good->content_checked = 0;
8714         good->owner_ref_checked = 0;
8715         good->num_duplicates = 0;
8716         good->refs = rec->refs;
8717         list_splice_init(&rec->backrefs, &good->backrefs);
8718         while (1) {
8719                 cache = lookup_cache_extent(extent_cache, good->start,
8720                                             good->nr);
8721                 if (!cache)
8722                         break;
8723                 tmp = container_of(cache, struct extent_record, cache);
8724
8725                 /*
8726                  * If we find another overlapping extent and it's found_rec is
8727                  * set then it's a duplicate and we need to try and delete
8728                  * something.
8729                  */
8730                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8731                         if (list_empty(&good->list))
8732                                 list_add_tail(&good->list,
8733                                               &duplicate_extents);
8734                         good->num_duplicates += tmp->num_duplicates + 1;
8735                         list_splice_init(&tmp->dups, &good->dups);
8736                         list_del_init(&tmp->list);
8737                         list_add_tail(&tmp->list, &good->dups);
8738                         remove_cache_extent(extent_cache, &tmp->cache);
8739                         continue;
8740                 }
8741
8742                 /*
8743                  * Ok we have another non extent item backed extent rec, so lets
8744                  * just add it to this extent and carry on like we did above.
8745                  */
8746                 good->refs += tmp->refs;
8747                 list_splice_init(&tmp->backrefs, &good->backrefs);
8748                 remove_cache_extent(extent_cache, &tmp->cache);
8749                 free(tmp);
8750         }
8751         ret = insert_cache_extent(extent_cache, &good->cache);
8752         BUG_ON(ret);
8753         free(rec);
8754         return good->num_duplicates ? 0 : 1;
8755 }
8756
8757 static int delete_duplicate_records(struct btrfs_root *root,
8758                                     struct extent_record *rec)
8759 {
8760         struct btrfs_trans_handle *trans;
8761         LIST_HEAD(delete_list);
8762         struct btrfs_path path;
8763         struct extent_record *tmp, *good, *n;
8764         int nr_del = 0;
8765         int ret = 0, err;
8766         struct btrfs_key key;
8767
8768         btrfs_init_path(&path);
8769
8770         good = rec;
8771         /* Find the record that covers all of the duplicates. */
8772         list_for_each_entry(tmp, &rec->dups, list) {
8773                 if (good->start < tmp->start)
8774                         continue;
8775                 if (good->nr > tmp->nr)
8776                         continue;
8777
8778                 if (tmp->start + tmp->nr < good->start + good->nr) {
8779                         fprintf(stderr, "Ok we have overlapping extents that "
8780                                 "aren't completely covered by each other, this "
8781                                 "is going to require more careful thought.  "
8782                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8783                                 tmp->start, tmp->nr, good->start, good->nr);
8784                         abort();
8785                 }
8786                 good = tmp;
8787         }
8788
8789         if (good != rec)
8790                 list_add_tail(&rec->list, &delete_list);
8791
8792         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8793                 if (tmp == good)
8794                         continue;
8795                 list_move_tail(&tmp->list, &delete_list);
8796         }
8797
8798         root = root->fs_info->extent_root;
8799         trans = btrfs_start_transaction(root, 1);
8800         if (IS_ERR(trans)) {
8801                 ret = PTR_ERR(trans);
8802                 goto out;
8803         }
8804
8805         list_for_each_entry(tmp, &delete_list, list) {
8806                 if (tmp->found_rec == 0)
8807                         continue;
8808                 key.objectid = tmp->start;
8809                 key.type = BTRFS_EXTENT_ITEM_KEY;
8810                 key.offset = tmp->nr;
8811
8812                 /* Shouldn't happen but just in case */
8813                 if (tmp->metadata) {
8814                         fprintf(stderr, "Well this shouldn't happen, extent "
8815                                 "record overlaps but is metadata? "
8816                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8817                         abort();
8818                 }
8819
8820                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8821                 if (ret) {
8822                         if (ret > 0)
8823                                 ret = -EINVAL;
8824                         break;
8825                 }
8826                 ret = btrfs_del_item(trans, root, &path);
8827                 if (ret)
8828                         break;
8829                 btrfs_release_path(&path);
8830                 nr_del++;
8831         }
8832         err = btrfs_commit_transaction(trans, root);
8833         if (err && !ret)
8834                 ret = err;
8835 out:
8836         while (!list_empty(&delete_list)) {
8837                 tmp = to_extent_record(delete_list.next);
8838                 list_del_init(&tmp->list);
8839                 if (tmp == rec)
8840                         continue;
8841                 free(tmp);
8842         }
8843
8844         while (!list_empty(&rec->dups)) {
8845                 tmp = to_extent_record(rec->dups.next);
8846                 list_del_init(&tmp->list);
8847                 free(tmp);
8848         }
8849
8850         btrfs_release_path(&path);
8851
8852         if (!ret && !nr_del)
8853                 rec->num_duplicates = 0;
8854
8855         return ret ? ret : nr_del;
8856 }
8857
8858 static int find_possible_backrefs(struct btrfs_fs_info *info,
8859                                   struct btrfs_path *path,
8860                                   struct cache_tree *extent_cache,
8861                                   struct extent_record *rec)
8862 {
8863         struct btrfs_root *root;
8864         struct extent_backref *back;
8865         struct data_backref *dback;
8866         struct cache_extent *cache;
8867         struct btrfs_file_extent_item *fi;
8868         struct btrfs_key key;
8869         u64 bytenr, bytes;
8870         int ret;
8871
8872         list_for_each_entry(back, &rec->backrefs, list) {
8873                 /* Don't care about full backrefs (poor unloved backrefs) */
8874                 if (back->full_backref || !back->is_data)
8875                         continue;
8876
8877                 dback = to_data_backref(back);
8878
8879                 /* We found this one, we don't need to do a lookup */
8880                 if (dback->found_ref)
8881                         continue;
8882
8883                 key.objectid = dback->root;
8884                 key.type = BTRFS_ROOT_ITEM_KEY;
8885                 key.offset = (u64)-1;
8886
8887                 root = btrfs_read_fs_root(info, &key);
8888
8889                 /* No root, definitely a bad ref, skip */
8890                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8891                         continue;
8892                 /* Other err, exit */
8893                 if (IS_ERR(root))
8894                         return PTR_ERR(root);
8895
8896                 key.objectid = dback->owner;
8897                 key.type = BTRFS_EXTENT_DATA_KEY;
8898                 key.offset = dback->offset;
8899                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8900                 if (ret) {
8901                         btrfs_release_path(path);
8902                         if (ret < 0)
8903                                 return ret;
8904                         /* Didn't find it, we can carry on */
8905                         ret = 0;
8906                         continue;
8907                 }
8908
8909                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8910                                     struct btrfs_file_extent_item);
8911                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8912                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8913                 btrfs_release_path(path);
8914                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8915                 if (cache) {
8916                         struct extent_record *tmp;
8917                         tmp = container_of(cache, struct extent_record, cache);
8918
8919                         /*
8920                          * If we found an extent record for the bytenr for this
8921                          * particular backref then we can't add it to our
8922                          * current extent record.  We only want to add backrefs
8923                          * that don't have a corresponding extent item in the
8924                          * extent tree since they likely belong to this record
8925                          * and we need to fix it if it doesn't match bytenrs.
8926                          */
8927                         if  (tmp->found_rec)
8928                                 continue;
8929                 }
8930
8931                 dback->found_ref += 1;
8932                 dback->disk_bytenr = bytenr;
8933                 dback->bytes = bytes;
8934
8935                 /*
8936                  * Set this so the verify backref code knows not to trust the
8937                  * values in this backref.
8938                  */
8939                 back->broken = 1;
8940         }
8941
8942         return 0;
8943 }
8944
8945 /*
8946  * Record orphan data ref into corresponding root.
8947  *
8948  * Return 0 if the extent item contains data ref and recorded.
8949  * Return 1 if the extent item contains no useful data ref
8950  *   On that case, it may contains only shared_dataref or metadata backref
8951  *   or the file extent exists(this should be handled by the extent bytenr
8952  *   recovery routine)
8953  * Return <0 if something goes wrong.
8954  */
8955 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8956                                       struct extent_record *rec)
8957 {
8958         struct btrfs_key key;
8959         struct btrfs_root *dest_root;
8960         struct extent_backref *back;
8961         struct data_backref *dback;
8962         struct orphan_data_extent *orphan;
8963         struct btrfs_path path;
8964         int recorded_data_ref = 0;
8965         int ret = 0;
8966
8967         if (rec->metadata)
8968                 return 1;
8969         btrfs_init_path(&path);
8970         list_for_each_entry(back, &rec->backrefs, list) {
8971                 if (back->full_backref || !back->is_data ||
8972                     !back->found_extent_tree)
8973                         continue;
8974                 dback = to_data_backref(back);
8975                 if (dback->found_ref)
8976                         continue;
8977                 key.objectid = dback->root;
8978                 key.type = BTRFS_ROOT_ITEM_KEY;
8979                 key.offset = (u64)-1;
8980
8981                 dest_root = btrfs_read_fs_root(fs_info, &key);
8982
8983                 /* For non-exist root we just skip it */
8984                 if (IS_ERR(dest_root) || !dest_root)
8985                         continue;
8986
8987                 key.objectid = dback->owner;
8988                 key.type = BTRFS_EXTENT_DATA_KEY;
8989                 key.offset = dback->offset;
8990
8991                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8992                 btrfs_release_path(&path);
8993                 /*
8994                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8995                  * we need to record it for inode/file extent rebuild.
8996                  * For ret > 0, we record it only for file extent rebuild.
8997                  * For ret == 0, the file extent exists but only bytenr
8998                  * mismatch, let the original bytenr fix routine to handle,
8999                  * don't record it.
9000                  */
9001                 if (ret == 0)
9002                         continue;
9003                 ret = 0;
9004                 orphan = malloc(sizeof(*orphan));
9005                 if (!orphan) {
9006                         ret = -ENOMEM;
9007                         goto out;
9008                 }
9009                 INIT_LIST_HEAD(&orphan->list);
9010                 orphan->root = dback->root;
9011                 orphan->objectid = dback->owner;
9012                 orphan->offset = dback->offset;
9013                 orphan->disk_bytenr = rec->cache.start;
9014                 orphan->disk_len = rec->cache.size;
9015                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9016                 recorded_data_ref = 1;
9017         }
9018 out:
9019         btrfs_release_path(&path);
9020         if (!ret)
9021                 return !recorded_data_ref;
9022         else
9023                 return ret;
9024 }
9025
9026 /*
9027  * when an incorrect extent item is found, this will delete
9028  * all of the existing entries for it and recreate them
9029  * based on what the tree scan found.
9030  */
9031 static int fixup_extent_refs(struct btrfs_fs_info *info,
9032                              struct cache_tree *extent_cache,
9033                              struct extent_record *rec)
9034 {
9035         struct btrfs_trans_handle *trans = NULL;
9036         int ret;
9037         struct btrfs_path path;
9038         struct list_head *cur = rec->backrefs.next;
9039         struct cache_extent *cache;
9040         struct extent_backref *back;
9041         int allocated = 0;
9042         u64 flags = 0;
9043
9044         if (rec->flag_block_full_backref)
9045                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9046
9047         btrfs_init_path(&path);
9048         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9049                 /*
9050                  * Sometimes the backrefs themselves are so broken they don't
9051                  * get attached to any meaningful rec, so first go back and
9052                  * check any of our backrefs that we couldn't find and throw
9053                  * them into the list if we find the backref so that
9054                  * verify_backrefs can figure out what to do.
9055                  */
9056                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9057                 if (ret < 0)
9058                         goto out;
9059         }
9060
9061         /* step one, make sure all of the backrefs agree */
9062         ret = verify_backrefs(info, &path, rec);
9063         if (ret < 0)
9064                 goto out;
9065
9066         trans = btrfs_start_transaction(info->extent_root, 1);
9067         if (IS_ERR(trans)) {
9068                 ret = PTR_ERR(trans);
9069                 goto out;
9070         }
9071
9072         /* step two, delete all the existing records */
9073         ret = delete_extent_records(trans, info->extent_root, &path,
9074                                     rec->start);
9075
9076         if (ret < 0)
9077                 goto out;
9078
9079         /* was this block corrupt?  If so, don't add references to it */
9080         cache = lookup_cache_extent(info->corrupt_blocks,
9081                                     rec->start, rec->max_size);
9082         if (cache) {
9083                 ret = 0;
9084                 goto out;
9085         }
9086
9087         /* step three, recreate all the refs we did find */
9088         while(cur != &rec->backrefs) {
9089                 back = to_extent_backref(cur);
9090                 cur = cur->next;
9091
9092                 /*
9093                  * if we didn't find any references, don't create a
9094                  * new extent record
9095                  */
9096                 if (!back->found_ref)
9097                         continue;
9098
9099                 rec->bad_full_backref = 0;
9100                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9101                 allocated = 1;
9102
9103                 if (ret)
9104                         goto out;
9105         }
9106 out:
9107         if (trans) {
9108                 int err = btrfs_commit_transaction(trans, info->extent_root);
9109                 if (!ret)
9110                         ret = err;
9111         }
9112
9113         if (!ret)
9114                 fprintf(stderr, "Repaired extent references for %llu\n",
9115                                 (unsigned long long)rec->start);
9116
9117         btrfs_release_path(&path);
9118         return ret;
9119 }
9120
9121 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9122                               struct extent_record *rec)
9123 {
9124         struct btrfs_trans_handle *trans;
9125         struct btrfs_root *root = fs_info->extent_root;
9126         struct btrfs_path path;
9127         struct btrfs_extent_item *ei;
9128         struct btrfs_key key;
9129         u64 flags;
9130         int ret = 0;
9131
9132         key.objectid = rec->start;
9133         if (rec->metadata) {
9134                 key.type = BTRFS_METADATA_ITEM_KEY;
9135                 key.offset = rec->info_level;
9136         } else {
9137                 key.type = BTRFS_EXTENT_ITEM_KEY;
9138                 key.offset = rec->max_size;
9139         }
9140
9141         trans = btrfs_start_transaction(root, 0);
9142         if (IS_ERR(trans))
9143                 return PTR_ERR(trans);
9144
9145         btrfs_init_path(&path);
9146         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9147         if (ret < 0) {
9148                 btrfs_release_path(&path);
9149                 btrfs_commit_transaction(trans, root);
9150                 return ret;
9151         } else if (ret) {
9152                 fprintf(stderr, "Didn't find extent for %llu\n",
9153                         (unsigned long long)rec->start);
9154                 btrfs_release_path(&path);
9155                 btrfs_commit_transaction(trans, root);
9156                 return -ENOENT;
9157         }
9158
9159         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9160                             struct btrfs_extent_item);
9161         flags = btrfs_extent_flags(path.nodes[0], ei);
9162         if (rec->flag_block_full_backref) {
9163                 fprintf(stderr, "setting full backref on %llu\n",
9164                         (unsigned long long)key.objectid);
9165                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9166         } else {
9167                 fprintf(stderr, "clearing full backref on %llu\n",
9168                         (unsigned long long)key.objectid);
9169                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9170         }
9171         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9172         btrfs_mark_buffer_dirty(path.nodes[0]);
9173         btrfs_release_path(&path);
9174         ret = btrfs_commit_transaction(trans, root);
9175         if (!ret)
9176                 fprintf(stderr, "Repaired extent flags for %llu\n",
9177                                 (unsigned long long)rec->start);
9178
9179         return ret;
9180 }
9181
9182 /* right now we only prune from the extent allocation tree */
9183 static int prune_one_block(struct btrfs_trans_handle *trans,
9184                            struct btrfs_fs_info *info,
9185                            struct btrfs_corrupt_block *corrupt)
9186 {
9187         int ret;
9188         struct btrfs_path path;
9189         struct extent_buffer *eb;
9190         u64 found;
9191         int slot;
9192         int nritems;
9193         int level = corrupt->level + 1;
9194
9195         btrfs_init_path(&path);
9196 again:
9197         /* we want to stop at the parent to our busted block */
9198         path.lowest_level = level;
9199
9200         ret = btrfs_search_slot(trans, info->extent_root,
9201                                 &corrupt->key, &path, -1, 1);
9202
9203         if (ret < 0)
9204                 goto out;
9205
9206         eb = path.nodes[level];
9207         if (!eb) {
9208                 ret = -ENOENT;
9209                 goto out;
9210         }
9211
9212         /*
9213          * hopefully the search gave us the block we want to prune,
9214          * lets try that first
9215          */
9216         slot = path.slots[level];
9217         found =  btrfs_node_blockptr(eb, slot);
9218         if (found == corrupt->cache.start)
9219                 goto del_ptr;
9220
9221         nritems = btrfs_header_nritems(eb);
9222
9223         /* the search failed, lets scan this node and hope we find it */
9224         for (slot = 0; slot < nritems; slot++) {
9225                 found =  btrfs_node_blockptr(eb, slot);
9226                 if (found == corrupt->cache.start)
9227                         goto del_ptr;
9228         }
9229         /*
9230          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9231          * to this block
9232          */
9233         if (eb == info->extent_root->node) {
9234                 ret = -ENOENT;
9235                 goto out;
9236         } else {
9237                 level++;
9238                 btrfs_release_path(&path);
9239                 goto again;
9240         }
9241
9242 del_ptr:
9243         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9244         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9245
9246 out:
9247         btrfs_release_path(&path);
9248         return ret;
9249 }
9250
9251 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9252 {
9253         struct btrfs_trans_handle *trans = NULL;
9254         struct cache_extent *cache;
9255         struct btrfs_corrupt_block *corrupt;
9256
9257         while (1) {
9258                 cache = search_cache_extent(info->corrupt_blocks, 0);
9259                 if (!cache)
9260                         break;
9261                 if (!trans) {
9262                         trans = btrfs_start_transaction(info->extent_root, 1);
9263                         if (IS_ERR(trans))
9264                                 return PTR_ERR(trans);
9265                 }
9266                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9267                 prune_one_block(trans, info, corrupt);
9268                 remove_cache_extent(info->corrupt_blocks, cache);
9269         }
9270         if (trans)
9271                 return btrfs_commit_transaction(trans, info->extent_root);
9272         return 0;
9273 }
9274
9275 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9276 {
9277         struct btrfs_block_group_cache *cache;
9278         u64 start, end;
9279         int ret;
9280
9281         while (1) {
9282                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9283                                             &start, &end, EXTENT_DIRTY);
9284                 if (ret)
9285                         break;
9286                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9287         }
9288
9289         start = 0;
9290         while (1) {
9291                 cache = btrfs_lookup_first_block_group(fs_info, start);
9292                 if (!cache)
9293                         break;
9294                 if (cache->cached)
9295                         cache->cached = 0;
9296                 start = cache->key.objectid + cache->key.offset;
9297         }
9298 }
9299
9300 static int check_extent_refs(struct btrfs_root *root,
9301                              struct cache_tree *extent_cache)
9302 {
9303         struct extent_record *rec;
9304         struct cache_extent *cache;
9305         int ret = 0;
9306         int had_dups = 0;
9307
9308         if (repair) {
9309                 /*
9310                  * if we're doing a repair, we have to make sure
9311                  * we don't allocate from the problem extents.
9312                  * In the worst case, this will be all the
9313                  * extents in the FS
9314                  */
9315                 cache = search_cache_extent(extent_cache, 0);
9316                 while(cache) {
9317                         rec = container_of(cache, struct extent_record, cache);
9318                         set_extent_dirty(root->fs_info->excluded_extents,
9319                                          rec->start,
9320                                          rec->start + rec->max_size - 1);
9321                         cache = next_cache_extent(cache);
9322                 }
9323
9324                 /* pin down all the corrupted blocks too */
9325                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9326                 while(cache) {
9327                         set_extent_dirty(root->fs_info->excluded_extents,
9328                                          cache->start,
9329                                          cache->start + cache->size - 1);
9330                         cache = next_cache_extent(cache);
9331                 }
9332                 prune_corrupt_blocks(root->fs_info);
9333                 reset_cached_block_groups(root->fs_info);
9334         }
9335
9336         reset_cached_block_groups(root->fs_info);
9337
9338         /*
9339          * We need to delete any duplicate entries we find first otherwise we
9340          * could mess up the extent tree when we have backrefs that actually
9341          * belong to a different extent item and not the weird duplicate one.
9342          */
9343         while (repair && !list_empty(&duplicate_extents)) {
9344                 rec = to_extent_record(duplicate_extents.next);
9345                 list_del_init(&rec->list);
9346
9347                 /* Sometimes we can find a backref before we find an actual
9348                  * extent, so we need to process it a little bit to see if there
9349                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9350                  * if this is a backref screwup.  If we need to delete stuff
9351                  * process_duplicates() will return 0, otherwise it will return
9352                  * 1 and we
9353                  */
9354                 if (process_duplicates(extent_cache, rec))
9355                         continue;
9356                 ret = delete_duplicate_records(root, rec);
9357                 if (ret < 0)
9358                         return ret;
9359                 /*
9360                  * delete_duplicate_records will return the number of entries
9361                  * deleted, so if it's greater than 0 then we know we actually
9362                  * did something and we need to remove.
9363                  */
9364                 if (ret)
9365                         had_dups = 1;
9366         }
9367
9368         if (had_dups)
9369                 return -EAGAIN;
9370
9371         while(1) {
9372                 int cur_err = 0;
9373                 int fix = 0;
9374
9375                 cache = search_cache_extent(extent_cache, 0);
9376                 if (!cache)
9377                         break;
9378                 rec = container_of(cache, struct extent_record, cache);
9379                 if (rec->num_duplicates) {
9380                         fprintf(stderr, "extent item %llu has multiple extent "
9381                                 "items\n", (unsigned long long)rec->start);
9382                         cur_err = 1;
9383                 }
9384
9385                 if (rec->refs != rec->extent_item_refs) {
9386                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9387                                 (unsigned long long)rec->start,
9388                                 (unsigned long long)rec->nr);
9389                         fprintf(stderr, "extent item %llu, found %llu\n",
9390                                 (unsigned long long)rec->extent_item_refs,
9391                                 (unsigned long long)rec->refs);
9392                         ret = record_orphan_data_extents(root->fs_info, rec);
9393                         if (ret < 0)
9394                                 goto repair_abort;
9395                         fix = ret;
9396                         cur_err = 1;
9397                 }
9398                 if (all_backpointers_checked(rec, 1)) {
9399                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9400                                 (unsigned long long)rec->start,
9401                                 (unsigned long long)rec->nr);
9402                         fix = 1;
9403                         cur_err = 1;
9404                 }
9405                 if (!rec->owner_ref_checked) {
9406                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9407                                 (unsigned long long)rec->start,
9408                                 (unsigned long long)rec->nr);
9409                         fix = 1;
9410                         cur_err = 1;
9411                 }
9412
9413                 if (repair && fix) {
9414                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9415                         if (ret)
9416                                 goto repair_abort;
9417                 }
9418
9419
9420                 if (rec->bad_full_backref) {
9421                         fprintf(stderr, "bad full backref, on [%llu]\n",
9422                                 (unsigned long long)rec->start);
9423                         if (repair) {
9424                                 ret = fixup_extent_flags(root->fs_info, rec);
9425                                 if (ret)
9426                                         goto repair_abort;
9427                                 fix = 1;
9428                         }
9429                         cur_err = 1;
9430                 }
9431                 /*
9432                  * Although it's not a extent ref's problem, we reuse this
9433                  * routine for error reporting.
9434                  * No repair function yet.
9435                  */
9436                 if (rec->crossing_stripes) {
9437                         fprintf(stderr,
9438                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9439                                 rec->start, rec->start + rec->max_size);
9440                         cur_err = 1;
9441                 }
9442
9443                 if (rec->wrong_chunk_type) {
9444                         fprintf(stderr,
9445                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9446                                 rec->start, rec->start + rec->max_size);
9447                         cur_err = 1;
9448                 }
9449
9450                 remove_cache_extent(extent_cache, cache);
9451                 free_all_extent_backrefs(rec);
9452                 if (!init_extent_tree && repair && (!cur_err || fix))
9453                         clear_extent_dirty(root->fs_info->excluded_extents,
9454                                            rec->start,
9455                                            rec->start + rec->max_size - 1);
9456                 free(rec);
9457         }
9458 repair_abort:
9459         if (repair) {
9460                 if (ret && ret != -EAGAIN) {
9461                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9462                         exit(1);
9463                 } else if (!ret) {
9464                         struct btrfs_trans_handle *trans;
9465
9466                         root = root->fs_info->extent_root;
9467                         trans = btrfs_start_transaction(root, 1);
9468                         if (IS_ERR(trans)) {
9469                                 ret = PTR_ERR(trans);
9470                                 goto repair_abort;
9471                         }
9472
9473                         btrfs_fix_block_accounting(trans, root);
9474                         ret = btrfs_commit_transaction(trans, root);
9475                         if (ret)
9476                                 goto repair_abort;
9477                 }
9478                 return ret;
9479         }
9480         return 0;
9481 }
9482
9483 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9484 {
9485         u64 stripe_size;
9486
9487         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9488                 stripe_size = length;
9489                 stripe_size /= num_stripes;
9490         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9491                 stripe_size = length * 2;
9492                 stripe_size /= num_stripes;
9493         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9494                 stripe_size = length;
9495                 stripe_size /= (num_stripes - 1);
9496         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9497                 stripe_size = length;
9498                 stripe_size /= (num_stripes - 2);
9499         } else {
9500                 stripe_size = length;
9501         }
9502         return stripe_size;
9503 }
9504
9505 /*
9506  * Check the chunk with its block group/dev list ref:
9507  * Return 0 if all refs seems valid.
9508  * Return 1 if part of refs seems valid, need later check for rebuild ref
9509  * like missing block group and needs to search extent tree to rebuild them.
9510  * Return -1 if essential refs are missing and unable to rebuild.
9511  */
9512 static int check_chunk_refs(struct chunk_record *chunk_rec,
9513                             struct block_group_tree *block_group_cache,
9514                             struct device_extent_tree *dev_extent_cache,
9515                             int silent)
9516 {
9517         struct cache_extent *block_group_item;
9518         struct block_group_record *block_group_rec;
9519         struct cache_extent *dev_extent_item;
9520         struct device_extent_record *dev_extent_rec;
9521         u64 devid;
9522         u64 offset;
9523         u64 length;
9524         int metadump_v2 = 0;
9525         int i;
9526         int ret = 0;
9527
9528         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9529                                                chunk_rec->offset,
9530                                                chunk_rec->length);
9531         if (block_group_item) {
9532                 block_group_rec = container_of(block_group_item,
9533                                                struct block_group_record,
9534                                                cache);
9535                 if (chunk_rec->length != block_group_rec->offset ||
9536                     chunk_rec->offset != block_group_rec->objectid ||
9537                     (!metadump_v2 &&
9538                      chunk_rec->type_flags != block_group_rec->flags)) {
9539                         if (!silent)
9540                                 fprintf(stderr,
9541                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9542                                         chunk_rec->objectid,
9543                                         chunk_rec->type,
9544                                         chunk_rec->offset,
9545                                         chunk_rec->length,
9546                                         chunk_rec->offset,
9547                                         chunk_rec->type_flags,
9548                                         block_group_rec->objectid,
9549                                         block_group_rec->type,
9550                                         block_group_rec->offset,
9551                                         block_group_rec->offset,
9552                                         block_group_rec->objectid,
9553                                         block_group_rec->flags);
9554                         ret = -1;
9555                 } else {
9556                         list_del_init(&block_group_rec->list);
9557                         chunk_rec->bg_rec = block_group_rec;
9558                 }
9559         } else {
9560                 if (!silent)
9561                         fprintf(stderr,
9562                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9563                                 chunk_rec->objectid,
9564                                 chunk_rec->type,
9565                                 chunk_rec->offset,
9566                                 chunk_rec->length,
9567                                 chunk_rec->offset,
9568                                 chunk_rec->type_flags);
9569                 ret = 1;
9570         }
9571
9572         if (metadump_v2)
9573                 return ret;
9574
9575         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9576                                     chunk_rec->num_stripes);
9577         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9578                 devid = chunk_rec->stripes[i].devid;
9579                 offset = chunk_rec->stripes[i].offset;
9580                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9581                                                        devid, offset, length);
9582                 if (dev_extent_item) {
9583                         dev_extent_rec = container_of(dev_extent_item,
9584                                                 struct device_extent_record,
9585                                                 cache);
9586                         if (dev_extent_rec->objectid != devid ||
9587                             dev_extent_rec->offset != offset ||
9588                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9589                             dev_extent_rec->length != length) {
9590                                 if (!silent)
9591                                         fprintf(stderr,
9592                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9593                                                 chunk_rec->objectid,
9594                                                 chunk_rec->type,
9595                                                 chunk_rec->offset,
9596                                                 chunk_rec->stripes[i].devid,
9597                                                 chunk_rec->stripes[i].offset,
9598                                                 dev_extent_rec->objectid,
9599                                                 dev_extent_rec->offset,
9600                                                 dev_extent_rec->length);
9601                                 ret = -1;
9602                         } else {
9603                                 list_move(&dev_extent_rec->chunk_list,
9604                                           &chunk_rec->dextents);
9605                         }
9606                 } else {
9607                         if (!silent)
9608                                 fprintf(stderr,
9609                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9610                                         chunk_rec->objectid,
9611                                         chunk_rec->type,
9612                                         chunk_rec->offset,
9613                                         chunk_rec->stripes[i].devid,
9614                                         chunk_rec->stripes[i].offset);
9615                         ret = -1;
9616                 }
9617         }
9618         return ret;
9619 }
9620
9621 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9622 int check_chunks(struct cache_tree *chunk_cache,
9623                  struct block_group_tree *block_group_cache,
9624                  struct device_extent_tree *dev_extent_cache,
9625                  struct list_head *good, struct list_head *bad,
9626                  struct list_head *rebuild, int silent)
9627 {
9628         struct cache_extent *chunk_item;
9629         struct chunk_record *chunk_rec;
9630         struct block_group_record *bg_rec;
9631         struct device_extent_record *dext_rec;
9632         int err;
9633         int ret = 0;
9634
9635         chunk_item = first_cache_extent(chunk_cache);
9636         while (chunk_item) {
9637                 chunk_rec = container_of(chunk_item, struct chunk_record,
9638                                          cache);
9639                 err = check_chunk_refs(chunk_rec, block_group_cache,
9640                                        dev_extent_cache, silent);
9641                 if (err < 0)
9642                         ret = err;
9643                 if (err == 0 && good)
9644                         list_add_tail(&chunk_rec->list, good);
9645                 if (err > 0 && rebuild)
9646                         list_add_tail(&chunk_rec->list, rebuild);
9647                 if (err < 0 && bad)
9648                         list_add_tail(&chunk_rec->list, bad);
9649                 chunk_item = next_cache_extent(chunk_item);
9650         }
9651
9652         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9653                 if (!silent)
9654                         fprintf(stderr,
9655                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9656                                 bg_rec->objectid,
9657                                 bg_rec->offset,
9658                                 bg_rec->flags);
9659                 if (!ret)
9660                         ret = 1;
9661         }
9662
9663         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9664                             chunk_list) {
9665                 if (!silent)
9666                         fprintf(stderr,
9667                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9668                                 dext_rec->objectid,
9669                                 dext_rec->offset,
9670                                 dext_rec->length);
9671                 if (!ret)
9672                         ret = 1;
9673         }
9674         return ret;
9675 }
9676
9677
9678 static int check_device_used(struct device_record *dev_rec,
9679                              struct device_extent_tree *dext_cache)
9680 {
9681         struct cache_extent *cache;
9682         struct device_extent_record *dev_extent_rec;
9683         u64 total_byte = 0;
9684
9685         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9686         while (cache) {
9687                 dev_extent_rec = container_of(cache,
9688                                               struct device_extent_record,
9689                                               cache);
9690                 if (dev_extent_rec->objectid != dev_rec->devid)
9691                         break;
9692
9693                 list_del_init(&dev_extent_rec->device_list);
9694                 total_byte += dev_extent_rec->length;
9695                 cache = next_cache_extent(cache);
9696         }
9697
9698         if (total_byte != dev_rec->byte_used) {
9699                 fprintf(stderr,
9700                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9701                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9702                         dev_rec->type, dev_rec->offset);
9703                 return -1;
9704         } else {
9705                 return 0;
9706         }
9707 }
9708
9709 /* check btrfs_dev_item -> btrfs_dev_extent */
9710 static int check_devices(struct rb_root *dev_cache,
9711                          struct device_extent_tree *dev_extent_cache)
9712 {
9713         struct rb_node *dev_node;
9714         struct device_record *dev_rec;
9715         struct device_extent_record *dext_rec;
9716         int err;
9717         int ret = 0;
9718
9719         dev_node = rb_first(dev_cache);
9720         while (dev_node) {
9721                 dev_rec = container_of(dev_node, struct device_record, node);
9722                 err = check_device_used(dev_rec, dev_extent_cache);
9723                 if (err)
9724                         ret = err;
9725
9726                 dev_node = rb_next(dev_node);
9727         }
9728         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9729                             device_list) {
9730                 fprintf(stderr,
9731                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9732                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9733                 if (!ret)
9734                         ret = 1;
9735         }
9736         return ret;
9737 }
9738
9739 static int add_root_item_to_list(struct list_head *head,
9740                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9741                                   u8 level, u8 drop_level,
9742                                   int level_size, struct btrfs_key *drop_key)
9743 {
9744
9745         struct root_item_record *ri_rec;
9746         ri_rec = malloc(sizeof(*ri_rec));
9747         if (!ri_rec)
9748                 return -ENOMEM;
9749         ri_rec->bytenr = bytenr;
9750         ri_rec->objectid = objectid;
9751         ri_rec->level = level;
9752         ri_rec->level_size = level_size;
9753         ri_rec->drop_level = drop_level;
9754         ri_rec->last_snapshot = last_snapshot;
9755         if (drop_key)
9756                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9757         list_add_tail(&ri_rec->list, head);
9758
9759         return 0;
9760 }
9761
9762 static void free_root_item_list(struct list_head *list)
9763 {
9764         struct root_item_record *ri_rec;
9765
9766         while (!list_empty(list)) {
9767                 ri_rec = list_first_entry(list, struct root_item_record,
9768                                           list);
9769                 list_del_init(&ri_rec->list);
9770                 free(ri_rec);
9771         }
9772 }
9773
9774 static int deal_root_from_list(struct list_head *list,
9775                                struct btrfs_root *root,
9776                                struct block_info *bits,
9777                                int bits_nr,
9778                                struct cache_tree *pending,
9779                                struct cache_tree *seen,
9780                                struct cache_tree *reada,
9781                                struct cache_tree *nodes,
9782                                struct cache_tree *extent_cache,
9783                                struct cache_tree *chunk_cache,
9784                                struct rb_root *dev_cache,
9785                                struct block_group_tree *block_group_cache,
9786                                struct device_extent_tree *dev_extent_cache)
9787 {
9788         int ret = 0;
9789         u64 last;
9790
9791         while (!list_empty(list)) {
9792                 struct root_item_record *rec;
9793                 struct extent_buffer *buf;
9794                 rec = list_entry(list->next,
9795                                  struct root_item_record, list);
9796                 last = 0;
9797                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9798                 if (!extent_buffer_uptodate(buf)) {
9799                         free_extent_buffer(buf);
9800                         ret = -EIO;
9801                         break;
9802                 }
9803                 ret = add_root_to_pending(buf, extent_cache, pending,
9804                                     seen, nodes, rec->objectid);
9805                 if (ret < 0)
9806                         break;
9807                 /*
9808                  * To rebuild extent tree, we need deal with snapshot
9809                  * one by one, otherwise we deal with node firstly which
9810                  * can maximize readahead.
9811                  */
9812                 while (1) {
9813                         ret = run_next_block(root, bits, bits_nr, &last,
9814                                              pending, seen, reada, nodes,
9815                                              extent_cache, chunk_cache,
9816                                              dev_cache, block_group_cache,
9817                                              dev_extent_cache, rec);
9818                         if (ret != 0)
9819                                 break;
9820                 }
9821                 free_extent_buffer(buf);
9822                 list_del(&rec->list);
9823                 free(rec);
9824                 if (ret < 0)
9825                         break;
9826         }
9827         while (ret >= 0) {
9828                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9829                                      reada, nodes, extent_cache, chunk_cache,
9830                                      dev_cache, block_group_cache,
9831                                      dev_extent_cache, NULL);
9832                 if (ret != 0) {
9833                         if (ret > 0)
9834                                 ret = 0;
9835                         break;
9836                 }
9837         }
9838         return ret;
9839 }
9840
9841 static int check_chunks_and_extents(struct btrfs_root *root)
9842 {
9843         struct rb_root dev_cache;
9844         struct cache_tree chunk_cache;
9845         struct block_group_tree block_group_cache;
9846         struct device_extent_tree dev_extent_cache;
9847         struct cache_tree extent_cache;
9848         struct cache_tree seen;
9849         struct cache_tree pending;
9850         struct cache_tree reada;
9851         struct cache_tree nodes;
9852         struct extent_io_tree excluded_extents;
9853         struct cache_tree corrupt_blocks;
9854         struct btrfs_path path;
9855         struct btrfs_key key;
9856         struct btrfs_key found_key;
9857         int ret, err = 0;
9858         struct block_info *bits;
9859         int bits_nr;
9860         struct extent_buffer *leaf;
9861         int slot;
9862         struct btrfs_root_item ri;
9863         struct list_head dropping_trees;
9864         struct list_head normal_trees;
9865         struct btrfs_root *root1;
9866         u64 objectid;
9867         u32 level_size;
9868         u8 level;
9869
9870         dev_cache = RB_ROOT;
9871         cache_tree_init(&chunk_cache);
9872         block_group_tree_init(&block_group_cache);
9873         device_extent_tree_init(&dev_extent_cache);
9874
9875         cache_tree_init(&extent_cache);
9876         cache_tree_init(&seen);
9877         cache_tree_init(&pending);
9878         cache_tree_init(&nodes);
9879         cache_tree_init(&reada);
9880         cache_tree_init(&corrupt_blocks);
9881         extent_io_tree_init(&excluded_extents);
9882         INIT_LIST_HEAD(&dropping_trees);
9883         INIT_LIST_HEAD(&normal_trees);
9884
9885         if (repair) {
9886                 root->fs_info->excluded_extents = &excluded_extents;
9887                 root->fs_info->fsck_extent_cache = &extent_cache;
9888                 root->fs_info->free_extent_hook = free_extent_hook;
9889                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9890         }
9891
9892         bits_nr = 1024;
9893         bits = malloc(bits_nr * sizeof(struct block_info));
9894         if (!bits) {
9895                 perror("malloc");
9896                 exit(1);
9897         }
9898
9899         if (ctx.progress_enabled) {
9900                 ctx.tp = TASK_EXTENTS;
9901                 task_start(ctx.info);
9902         }
9903
9904 again:
9905         root1 = root->fs_info->tree_root;
9906         level = btrfs_header_level(root1->node);
9907         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9908                                     root1->node->start, 0, level, 0,
9909                                     root1->fs_info->nodesize, NULL);
9910         if (ret < 0)
9911                 goto out;
9912         root1 = root->fs_info->chunk_root;
9913         level = btrfs_header_level(root1->node);
9914         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9915                                     root1->node->start, 0, level, 0,
9916                                     root1->fs_info->nodesize, NULL);
9917         if (ret < 0)
9918                 goto out;
9919         btrfs_init_path(&path);
9920         key.offset = 0;
9921         key.objectid = 0;
9922         key.type = BTRFS_ROOT_ITEM_KEY;
9923         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9924                                         &key, &path, 0, 0);
9925         if (ret < 0)
9926                 goto out;
9927         while(1) {
9928                 leaf = path.nodes[0];
9929                 slot = path.slots[0];
9930                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9931                         ret = btrfs_next_leaf(root, &path);
9932                         if (ret != 0)
9933                                 break;
9934                         leaf = path.nodes[0];
9935                         slot = path.slots[0];
9936                 }
9937                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9938                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9939                         unsigned long offset;
9940                         u64 last_snapshot;
9941
9942                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9943                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9944                         last_snapshot = btrfs_root_last_snapshot(&ri);
9945                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9946                                 level = btrfs_root_level(&ri);
9947                                 level_size = root->fs_info->nodesize;
9948                                 ret = add_root_item_to_list(&normal_trees,
9949                                                 found_key.objectid,
9950                                                 btrfs_root_bytenr(&ri),
9951                                                 last_snapshot, level,
9952                                                 0, level_size, NULL);
9953                                 if (ret < 0)
9954                                         goto out;
9955                         } else {
9956                                 level = btrfs_root_level(&ri);
9957                                 level_size = root->fs_info->nodesize;
9958                                 objectid = found_key.objectid;
9959                                 btrfs_disk_key_to_cpu(&found_key,
9960                                                       &ri.drop_progress);
9961                                 ret = add_root_item_to_list(&dropping_trees,
9962                                                 objectid,
9963                                                 btrfs_root_bytenr(&ri),
9964                                                 last_snapshot, level,
9965                                                 ri.drop_level,
9966                                                 level_size, &found_key);
9967                                 if (ret < 0)
9968                                         goto out;
9969                         }
9970                 }
9971                 path.slots[0]++;
9972         }
9973         btrfs_release_path(&path);
9974
9975         /*
9976          * check_block can return -EAGAIN if it fixes something, please keep
9977          * this in mind when dealing with return values from these functions, if
9978          * we get -EAGAIN we want to fall through and restart the loop.
9979          */
9980         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9981                                   &seen, &reada, &nodes, &extent_cache,
9982                                   &chunk_cache, &dev_cache, &block_group_cache,
9983                                   &dev_extent_cache);
9984         if (ret < 0) {
9985                 if (ret == -EAGAIN)
9986                         goto loop;
9987                 goto out;
9988         }
9989         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9990                                   &pending, &seen, &reada, &nodes,
9991                                   &extent_cache, &chunk_cache, &dev_cache,
9992                                   &block_group_cache, &dev_extent_cache);
9993         if (ret < 0) {
9994                 if (ret == -EAGAIN)
9995                         goto loop;
9996                 goto out;
9997         }
9998
9999         ret = check_chunks(&chunk_cache, &block_group_cache,
10000                            &dev_extent_cache, NULL, NULL, NULL, 0);
10001         if (ret) {
10002                 if (ret == -EAGAIN)
10003                         goto loop;
10004                 err = ret;
10005         }
10006
10007         ret = check_extent_refs(root, &extent_cache);
10008         if (ret < 0) {
10009                 if (ret == -EAGAIN)
10010                         goto loop;
10011                 goto out;
10012         }
10013
10014         ret = check_devices(&dev_cache, &dev_extent_cache);
10015         if (ret && err)
10016                 ret = err;
10017
10018 out:
10019         task_stop(ctx.info);
10020         if (repair) {
10021                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10022                 extent_io_tree_cleanup(&excluded_extents);
10023                 root->fs_info->fsck_extent_cache = NULL;
10024                 root->fs_info->free_extent_hook = NULL;
10025                 root->fs_info->corrupt_blocks = NULL;
10026                 root->fs_info->excluded_extents = NULL;
10027         }
10028         free(bits);
10029         free_chunk_cache_tree(&chunk_cache);
10030         free_device_cache_tree(&dev_cache);
10031         free_block_group_tree(&block_group_cache);
10032         free_device_extent_tree(&dev_extent_cache);
10033         free_extent_cache_tree(&seen);
10034         free_extent_cache_tree(&pending);
10035         free_extent_cache_tree(&reada);
10036         free_extent_cache_tree(&nodes);
10037         free_root_item_list(&normal_trees);
10038         free_root_item_list(&dropping_trees);
10039         return ret;
10040 loop:
10041         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10042         free_extent_cache_tree(&seen);
10043         free_extent_cache_tree(&pending);
10044         free_extent_cache_tree(&reada);
10045         free_extent_cache_tree(&nodes);
10046         free_chunk_cache_tree(&chunk_cache);
10047         free_block_group_tree(&block_group_cache);
10048         free_device_cache_tree(&dev_cache);
10049         free_device_extent_tree(&dev_extent_cache);
10050         free_extent_record_cache(&extent_cache);
10051         free_root_item_list(&normal_trees);
10052         free_root_item_list(&dropping_trees);
10053         extent_io_tree_cleanup(&excluded_extents);
10054         goto again;
10055 }
10056
10057 /*
10058  * Check backrefs of a tree block given by @bytenr or @eb.
10059  *
10060  * @root:       the root containing the @bytenr or @eb
10061  * @eb:         tree block extent buffer, can be NULL
10062  * @bytenr:     bytenr of the tree block to search
10063  * @level:      tree level of the tree block
10064  * @owner:      owner of the tree block
10065  *
10066  * Return >0 for any error found and output error message
10067  * Return 0 for no error found
10068  */
10069 static int check_tree_block_ref(struct btrfs_root *root,
10070                                 struct extent_buffer *eb, u64 bytenr,
10071                                 int level, u64 owner)
10072 {
10073         struct btrfs_key key;
10074         struct btrfs_root *extent_root = root->fs_info->extent_root;
10075         struct btrfs_path path;
10076         struct btrfs_extent_item *ei;
10077         struct btrfs_extent_inline_ref *iref;
10078         struct extent_buffer *leaf;
10079         unsigned long end;
10080         unsigned long ptr;
10081         int slot;
10082         int skinny_level;
10083         int type;
10084         u32 nodesize = root->fs_info->nodesize;
10085         u32 item_size;
10086         u64 offset;
10087         int tree_reloc_root = 0;
10088         int found_ref = 0;
10089         int err = 0;
10090         int ret;
10091
10092         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10093             btrfs_header_bytenr(root->node) == bytenr)
10094                 tree_reloc_root = 1;
10095
10096         btrfs_init_path(&path);
10097         key.objectid = bytenr;
10098         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10099                 key.type = BTRFS_METADATA_ITEM_KEY;
10100         else
10101                 key.type = BTRFS_EXTENT_ITEM_KEY;
10102         key.offset = (u64)-1;
10103
10104         /* Search for the backref in extent tree */
10105         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10106         if (ret < 0) {
10107                 err |= BACKREF_MISSING;
10108                 goto out;
10109         }
10110         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10111         if (ret) {
10112                 err |= BACKREF_MISSING;
10113                 goto out;
10114         }
10115
10116         leaf = path.nodes[0];
10117         slot = path.slots[0];
10118         btrfs_item_key_to_cpu(leaf, &key, slot);
10119
10120         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10121
10122         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10123                 skinny_level = (int)key.offset;
10124                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10125         } else {
10126                 struct btrfs_tree_block_info *info;
10127
10128                 info = (struct btrfs_tree_block_info *)(ei + 1);
10129                 skinny_level = btrfs_tree_block_level(leaf, info);
10130                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10131         }
10132
10133         if (eb) {
10134                 u64 header_gen;
10135                 u64 extent_gen;
10136
10137                 if (!(btrfs_extent_flags(leaf, ei) &
10138                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10139                         error(
10140                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10141                                 key.objectid, nodesize,
10142                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10143                         err = BACKREF_MISMATCH;
10144                 }
10145                 header_gen = btrfs_header_generation(eb);
10146                 extent_gen = btrfs_extent_generation(leaf, ei);
10147                 if (header_gen != extent_gen) {
10148                         error(
10149         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10150                                 key.objectid, nodesize, header_gen,
10151                                 extent_gen);
10152                         err = BACKREF_MISMATCH;
10153                 }
10154                 if (level != skinny_level) {
10155                         error(
10156                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10157                                 key.objectid, nodesize, level, skinny_level);
10158                         err = BACKREF_MISMATCH;
10159                 }
10160                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10161                         error(
10162                         "extent[%llu %u] is referred by other roots than %llu",
10163                                 key.objectid, nodesize, root->objectid);
10164                         err = BACKREF_MISMATCH;
10165                 }
10166         }
10167
10168         /*
10169          * Iterate the extent/metadata item to find the exact backref
10170          */
10171         item_size = btrfs_item_size_nr(leaf, slot);
10172         ptr = (unsigned long)iref;
10173         end = (unsigned long)ei + item_size;
10174         while (ptr < end) {
10175                 iref = (struct btrfs_extent_inline_ref *)ptr;
10176                 type = btrfs_extent_inline_ref_type(leaf, iref);
10177                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10178
10179                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10180                         (offset == root->objectid || offset == owner)) {
10181                         found_ref = 1;
10182                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10183                         /*
10184                          * Backref of tree reloc root points to itself, no need
10185                          * to check backref any more.
10186                          */
10187                         if (tree_reloc_root)
10188                                 found_ref = 1;
10189                         else
10190                         /* Check if the backref points to valid referencer */
10191                                 found_ref = !check_tree_block_ref(root, NULL,
10192                                                 offset, level + 1, owner);
10193                 }
10194
10195                 if (found_ref)
10196                         break;
10197                 ptr += btrfs_extent_inline_ref_size(type);
10198         }
10199
10200         /*
10201          * Inlined extent item doesn't have what we need, check
10202          * TREE_BLOCK_REF_KEY
10203          */
10204         if (!found_ref) {
10205                 btrfs_release_path(&path);
10206                 key.objectid = bytenr;
10207                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10208                 key.offset = root->objectid;
10209
10210                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10211                 if (!ret)
10212                         found_ref = 1;
10213         }
10214         if (!found_ref)
10215                 err |= BACKREF_MISSING;
10216 out:
10217         btrfs_release_path(&path);
10218         if (eb && (err & BACKREF_MISSING))
10219                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10220                         bytenr, nodesize, owner, level);
10221         return err;
10222 }
10223
10224 /*
10225  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10226  *
10227  * Return >0 any error found and output error message
10228  * Return 0 for no error found
10229  */
10230 static int check_extent_data_item(struct btrfs_root *root,
10231                                   struct extent_buffer *eb, int slot)
10232 {
10233         struct btrfs_file_extent_item *fi;
10234         struct btrfs_path path;
10235         struct btrfs_root *extent_root = root->fs_info->extent_root;
10236         struct btrfs_key fi_key;
10237         struct btrfs_key dbref_key;
10238         struct extent_buffer *leaf;
10239         struct btrfs_extent_item *ei;
10240         struct btrfs_extent_inline_ref *iref;
10241         struct btrfs_extent_data_ref *dref;
10242         u64 owner;
10243         u64 disk_bytenr;
10244         u64 disk_num_bytes;
10245         u64 extent_num_bytes;
10246         u64 extent_flags;
10247         u32 item_size;
10248         unsigned long end;
10249         unsigned long ptr;
10250         int type;
10251         u64 ref_root;
10252         int found_dbackref = 0;
10253         int err = 0;
10254         int ret;
10255
10256         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10257         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10258
10259         /* Nothing to check for hole and inline data extents */
10260         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10261             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10262                 return 0;
10263
10264         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10265         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10266         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10267
10268         /* Check unaligned disk_num_bytes and num_bytes */
10269         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10270                 error(
10271 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10272                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10273                         root->fs_info->sectorsize);
10274                 err |= BYTES_UNALIGNED;
10275         } else {
10276                 data_bytes_allocated += disk_num_bytes;
10277         }
10278         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10279                 error(
10280 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10281                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10282                         root->fs_info->sectorsize);
10283                 err |= BYTES_UNALIGNED;
10284         } else {
10285                 data_bytes_referenced += extent_num_bytes;
10286         }
10287         owner = btrfs_header_owner(eb);
10288
10289         /* Check the extent item of the file extent in extent tree */
10290         btrfs_init_path(&path);
10291         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10292         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10293         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10294
10295         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10296         if (ret)
10297                 goto out;
10298
10299         leaf = path.nodes[0];
10300         slot = path.slots[0];
10301         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10302
10303         extent_flags = btrfs_extent_flags(leaf, ei);
10304
10305         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10306                 error(
10307                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10308                     disk_bytenr, disk_num_bytes,
10309                     BTRFS_EXTENT_FLAG_DATA);
10310                 err |= BACKREF_MISMATCH;
10311         }
10312
10313         /* Check data backref inside that extent item */
10314         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10315         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10316         ptr = (unsigned long)iref;
10317         end = (unsigned long)ei + item_size;
10318         while (ptr < end) {
10319                 iref = (struct btrfs_extent_inline_ref *)ptr;
10320                 type = btrfs_extent_inline_ref_type(leaf, iref);
10321                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10322
10323                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10324                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10325                         if (ref_root == owner || ref_root == root->objectid)
10326                                 found_dbackref = 1;
10327                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10328                         found_dbackref = !check_tree_block_ref(root, NULL,
10329                                 btrfs_extent_inline_ref_offset(leaf, iref),
10330                                 0, owner);
10331                 }
10332
10333                 if (found_dbackref)
10334                         break;
10335                 ptr += btrfs_extent_inline_ref_size(type);
10336         }
10337
10338         if (!found_dbackref) {
10339                 btrfs_release_path(&path);
10340
10341                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10342                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10343                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10344                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10345                                 fi_key.objectid, fi_key.offset);
10346
10347                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10348                                         &dbref_key, &path, 0, 0);
10349                 if (!ret) {
10350                         found_dbackref = 1;
10351                         goto out;
10352                 }
10353
10354                 btrfs_release_path(&path);
10355
10356                 /*
10357                  * Neither inlined nor EXTENT_DATA_REF found, try
10358                  * SHARED_DATA_REF as last chance.
10359                  */
10360                 dbref_key.objectid = disk_bytenr;
10361                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10362                 dbref_key.offset = eb->start;
10363
10364                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10365                                         &dbref_key, &path, 0, 0);
10366                 if (!ret) {
10367                         found_dbackref = 1;
10368                         goto out;
10369                 }
10370         }
10371
10372 out:
10373         if (!found_dbackref)
10374                 err |= BACKREF_MISSING;
10375         btrfs_release_path(&path);
10376         if (err & BACKREF_MISSING) {
10377                 error("data extent[%llu %llu] backref lost",
10378                       disk_bytenr, disk_num_bytes);
10379         }
10380         return err;
10381 }
10382
10383 /*
10384  * Get real tree block level for the case like shared block
10385  * Return >= 0 as tree level
10386  * Return <0 for error
10387  */
10388 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10389 {
10390         struct extent_buffer *eb;
10391         struct btrfs_path path;
10392         struct btrfs_key key;
10393         struct btrfs_extent_item *ei;
10394         u64 flags;
10395         u64 transid;
10396         u8 backref_level;
10397         u8 header_level;
10398         int ret;
10399
10400         /* Search extent tree for extent generation and level */
10401         key.objectid = bytenr;
10402         key.type = BTRFS_METADATA_ITEM_KEY;
10403         key.offset = (u64)-1;
10404
10405         btrfs_init_path(&path);
10406         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10407         if (ret < 0)
10408                 goto release_out;
10409         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10410         if (ret < 0)
10411                 goto release_out;
10412         if (ret > 0) {
10413                 ret = -ENOENT;
10414                 goto release_out;
10415         }
10416
10417         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10418         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10419                             struct btrfs_extent_item);
10420         flags = btrfs_extent_flags(path.nodes[0], ei);
10421         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10422                 ret = -ENOENT;
10423                 goto release_out;
10424         }
10425
10426         /* Get transid for later read_tree_block() check */
10427         transid = btrfs_extent_generation(path.nodes[0], ei);
10428
10429         /* Get backref level as one source */
10430         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10431                 backref_level = key.offset;
10432         } else {
10433                 struct btrfs_tree_block_info *info;
10434
10435                 info = (struct btrfs_tree_block_info *)(ei + 1);
10436                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10437         }
10438         btrfs_release_path(&path);
10439
10440         /* Get level from tree block as an alternative source */
10441         eb = read_tree_block(fs_info, bytenr, transid);
10442         if (!extent_buffer_uptodate(eb)) {
10443                 free_extent_buffer(eb);
10444                 return -EIO;
10445         }
10446         header_level = btrfs_header_level(eb);
10447         free_extent_buffer(eb);
10448
10449         if (header_level != backref_level)
10450                 return -EIO;
10451         return header_level;
10452
10453 release_out:
10454         btrfs_release_path(&path);
10455         return ret;
10456 }
10457
10458 /*
10459  * Check if a tree block backref is valid (points to a valid tree block)
10460  * if level == -1, level will be resolved
10461  * Return >0 for any error found and print error message
10462  */
10463 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10464                                     u64 bytenr, int level)
10465 {
10466         struct btrfs_root *root;
10467         struct btrfs_key key;
10468         struct btrfs_path path;
10469         struct extent_buffer *eb;
10470         struct extent_buffer *node;
10471         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10472         int err = 0;
10473         int ret;
10474
10475         /* Query level for level == -1 special case */
10476         if (level == -1)
10477                 level = query_tree_block_level(fs_info, bytenr);
10478         if (level < 0) {
10479                 err |= REFERENCER_MISSING;
10480                 goto out;
10481         }
10482
10483         key.objectid = root_id;
10484         key.type = BTRFS_ROOT_ITEM_KEY;
10485         key.offset = (u64)-1;
10486
10487         root = btrfs_read_fs_root(fs_info, &key);
10488         if (IS_ERR(root)) {
10489                 err |= REFERENCER_MISSING;
10490                 goto out;
10491         }
10492
10493         /* Read out the tree block to get item/node key */
10494         eb = read_tree_block(fs_info, bytenr, 0);
10495         if (!extent_buffer_uptodate(eb)) {
10496                 err |= REFERENCER_MISSING;
10497                 free_extent_buffer(eb);
10498                 goto out;
10499         }
10500
10501         /* Empty tree, no need to check key */
10502         if (!btrfs_header_nritems(eb) && !level) {
10503                 free_extent_buffer(eb);
10504                 goto out;
10505         }
10506
10507         if (level)
10508                 btrfs_node_key_to_cpu(eb, &key, 0);
10509         else
10510                 btrfs_item_key_to_cpu(eb, &key, 0);
10511
10512         free_extent_buffer(eb);
10513
10514         btrfs_init_path(&path);
10515         path.lowest_level = level;
10516         /* Search with the first key, to ensure we can reach it */
10517         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10518         if (ret < 0) {
10519                 err |= REFERENCER_MISSING;
10520                 goto release_out;
10521         }
10522
10523         node = path.nodes[level];
10524         if (btrfs_header_bytenr(node) != bytenr) {
10525                 error(
10526         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10527                         bytenr, nodesize, bytenr,
10528                         btrfs_header_bytenr(node));
10529                 err |= REFERENCER_MISMATCH;
10530         }
10531         if (btrfs_header_level(node) != level) {
10532                 error(
10533         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10534                         bytenr, nodesize, level,
10535                         btrfs_header_level(node));
10536                 err |= REFERENCER_MISMATCH;
10537         }
10538
10539 release_out:
10540         btrfs_release_path(&path);
10541 out:
10542         if (err & REFERENCER_MISSING) {
10543                 if (level < 0)
10544                         error("extent [%llu %d] lost referencer (owner: %llu)",
10545                                 bytenr, nodesize, root_id);
10546                 else
10547                         error(
10548                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10549                                 bytenr, nodesize, root_id, level);
10550         }
10551
10552         return err;
10553 }
10554
10555 /*
10556  * Check if tree block @eb is tree reloc root.
10557  * Return 0 if it's not or any problem happens
10558  * Return 1 if it's a tree reloc root
10559  */
10560 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10561                                  struct extent_buffer *eb)
10562 {
10563         struct btrfs_root *tree_reloc_root;
10564         struct btrfs_key key;
10565         u64 bytenr = btrfs_header_bytenr(eb);
10566         u64 owner = btrfs_header_owner(eb);
10567         int ret = 0;
10568
10569         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10570         key.offset = owner;
10571         key.type = BTRFS_ROOT_ITEM_KEY;
10572
10573         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10574         if (IS_ERR(tree_reloc_root))
10575                 return 0;
10576
10577         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10578                 ret = 1;
10579         btrfs_free_fs_root(tree_reloc_root);
10580         return ret;
10581 }
10582
10583 /*
10584  * Check referencer for shared block backref
10585  * If level == -1, this function will resolve the level.
10586  */
10587 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10588                                      u64 parent, u64 bytenr, int level)
10589 {
10590         struct extent_buffer *eb;
10591         u32 nr;
10592         int found_parent = 0;
10593         int i;
10594
10595         eb = read_tree_block(fs_info, parent, 0);
10596         if (!extent_buffer_uptodate(eb))
10597                 goto out;
10598
10599         if (level == -1)
10600                 level = query_tree_block_level(fs_info, bytenr);
10601         if (level < 0)
10602                 goto out;
10603
10604         /* It's possible it's a tree reloc root */
10605         if (parent == bytenr) {
10606                 if (is_tree_reloc_root(fs_info, eb))
10607                         found_parent = 1;
10608                 goto out;
10609         }
10610
10611         if (level + 1 != btrfs_header_level(eb))
10612                 goto out;
10613
10614         nr = btrfs_header_nritems(eb);
10615         for (i = 0; i < nr; i++) {
10616                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10617                         found_parent = 1;
10618                         break;
10619                 }
10620         }
10621 out:
10622         free_extent_buffer(eb);
10623         if (!found_parent) {
10624                 error(
10625         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10626                         bytenr, fs_info->nodesize, parent, level);
10627                 return REFERENCER_MISSING;
10628         }
10629         return 0;
10630 }
10631
10632 /*
10633  * Check referencer for normal (inlined) data ref
10634  * If len == 0, it will be resolved by searching in extent tree
10635  */
10636 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10637                                      u64 root_id, u64 objectid, u64 offset,
10638                                      u64 bytenr, u64 len, u32 count)
10639 {
10640         struct btrfs_root *root;
10641         struct btrfs_root *extent_root = fs_info->extent_root;
10642         struct btrfs_key key;
10643         struct btrfs_path path;
10644         struct extent_buffer *leaf;
10645         struct btrfs_file_extent_item *fi;
10646         u32 found_count = 0;
10647         int slot;
10648         int ret = 0;
10649
10650         if (!len) {
10651                 key.objectid = bytenr;
10652                 key.type = BTRFS_EXTENT_ITEM_KEY;
10653                 key.offset = (u64)-1;
10654
10655                 btrfs_init_path(&path);
10656                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10657                 if (ret < 0)
10658                         goto out;
10659                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10660                 if (ret)
10661                         goto out;
10662                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10663                 if (key.objectid != bytenr ||
10664                     key.type != BTRFS_EXTENT_ITEM_KEY)
10665                         goto out;
10666                 len = key.offset;
10667                 btrfs_release_path(&path);
10668         }
10669         key.objectid = root_id;
10670         key.type = BTRFS_ROOT_ITEM_KEY;
10671         key.offset = (u64)-1;
10672         btrfs_init_path(&path);
10673
10674         root = btrfs_read_fs_root(fs_info, &key);
10675         if (IS_ERR(root))
10676                 goto out;
10677
10678         key.objectid = objectid;
10679         key.type = BTRFS_EXTENT_DATA_KEY;
10680         /*
10681          * It can be nasty as data backref offset is
10682          * file offset - file extent offset, which is smaller or
10683          * equal to original backref offset.  The only special case is
10684          * overflow.  So we need to special check and do further search.
10685          */
10686         key.offset = offset & (1ULL << 63) ? 0 : offset;
10687
10688         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10689         if (ret < 0)
10690                 goto out;
10691
10692         /*
10693          * Search afterwards to get correct one
10694          * NOTE: As we must do a comprehensive check on the data backref to
10695          * make sure the dref count also matches, we must iterate all file
10696          * extents for that inode.
10697          */
10698         while (1) {
10699                 leaf = path.nodes[0];
10700                 slot = path.slots[0];
10701
10702                 if (slot >= btrfs_header_nritems(leaf))
10703                         goto next;
10704                 btrfs_item_key_to_cpu(leaf, &key, slot);
10705                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10706                         break;
10707                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10708                 /*
10709                  * Except normal disk bytenr and disk num bytes, we still
10710                  * need to do extra check on dbackref offset as
10711                  * dbackref offset = file_offset - file_extent_offset
10712                  */
10713                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10714                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10715                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10716                     offset)
10717                         found_count++;
10718
10719 next:
10720                 ret = btrfs_next_item(root, &path);
10721                 if (ret)
10722                         break;
10723         }
10724 out:
10725         btrfs_release_path(&path);
10726         if (found_count != count) {
10727                 error(
10728 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10729                         bytenr, len, root_id, objectid, offset, count, found_count);
10730                 return REFERENCER_MISSING;
10731         }
10732         return 0;
10733 }
10734
10735 /*
10736  * Check if the referencer of a shared data backref exists
10737  */
10738 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10739                                      u64 parent, u64 bytenr)
10740 {
10741         struct extent_buffer *eb;
10742         struct btrfs_key key;
10743         struct btrfs_file_extent_item *fi;
10744         u32 nr;
10745         int found_parent = 0;
10746         int i;
10747
10748         eb = read_tree_block(fs_info, parent, 0);
10749         if (!extent_buffer_uptodate(eb))
10750                 goto out;
10751
10752         nr = btrfs_header_nritems(eb);
10753         for (i = 0; i < nr; i++) {
10754                 btrfs_item_key_to_cpu(eb, &key, i);
10755                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10756                         continue;
10757
10758                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10759                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10760                         continue;
10761
10762                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10763                         found_parent = 1;
10764                         break;
10765                 }
10766         }
10767
10768 out:
10769         free_extent_buffer(eb);
10770         if (!found_parent) {
10771                 error("shared extent %llu referencer lost (parent: %llu)",
10772                         bytenr, parent);
10773                 return REFERENCER_MISSING;
10774         }
10775         return 0;
10776 }
10777
10778 /*
10779  * This function will check a given extent item, including its backref and
10780  * itself (like crossing stripe boundary and type)
10781  *
10782  * Since we don't use extent_record anymore, introduce new error bit
10783  */
10784 static int check_extent_item(struct btrfs_fs_info *fs_info,
10785                              struct extent_buffer *eb, int slot)
10786 {
10787         struct btrfs_extent_item *ei;
10788         struct btrfs_extent_inline_ref *iref;
10789         struct btrfs_extent_data_ref *dref;
10790         unsigned long end;
10791         unsigned long ptr;
10792         int type;
10793         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10794         u32 item_size = btrfs_item_size_nr(eb, slot);
10795         u64 flags;
10796         u64 offset;
10797         int metadata = 0;
10798         int level;
10799         struct btrfs_key key;
10800         int ret;
10801         int err = 0;
10802
10803         btrfs_item_key_to_cpu(eb, &key, slot);
10804         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10805                 bytes_used += key.offset;
10806         else
10807                 bytes_used += nodesize;
10808
10809         if (item_size < sizeof(*ei)) {
10810                 /*
10811                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10812                  * old thing when on disk format is still un-determined.
10813                  * No need to care about it anymore
10814                  */
10815                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10816                 return -ENOTTY;
10817         }
10818
10819         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10820         flags = btrfs_extent_flags(eb, ei);
10821
10822         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10823                 metadata = 1;
10824         if (metadata && check_crossing_stripes(global_info, key.objectid,
10825                                                eb->len)) {
10826                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10827                       key.objectid, key.objectid + nodesize);
10828                 err |= CROSSING_STRIPE_BOUNDARY;
10829         }
10830
10831         ptr = (unsigned long)(ei + 1);
10832
10833         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10834                 /* Old EXTENT_ITEM metadata */
10835                 struct btrfs_tree_block_info *info;
10836
10837                 info = (struct btrfs_tree_block_info *)ptr;
10838                 level = btrfs_tree_block_level(eb, info);
10839                 ptr += sizeof(struct btrfs_tree_block_info);
10840         } else {
10841                 /* New METADATA_ITEM */
10842                 level = key.offset;
10843         }
10844         end = (unsigned long)ei + item_size;
10845
10846 next:
10847         /* Reached extent item end normally */
10848         if (ptr == end)
10849                 goto out;
10850
10851         /* Beyond extent item end, wrong item size */
10852         if (ptr > end) {
10853                 err |= ITEM_SIZE_MISMATCH;
10854                 error("extent item at bytenr %llu slot %d has wrong size",
10855                         eb->start, slot);
10856                 goto out;
10857         }
10858
10859         /* Now check every backref in this extent item */
10860         iref = (struct btrfs_extent_inline_ref *)ptr;
10861         type = btrfs_extent_inline_ref_type(eb, iref);
10862         offset = btrfs_extent_inline_ref_offset(eb, iref);
10863         switch (type) {
10864         case BTRFS_TREE_BLOCK_REF_KEY:
10865                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10866                                                level);
10867                 err |= ret;
10868                 break;
10869         case BTRFS_SHARED_BLOCK_REF_KEY:
10870                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10871                                                  level);
10872                 err |= ret;
10873                 break;
10874         case BTRFS_EXTENT_DATA_REF_KEY:
10875                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10876                 ret = check_extent_data_backref(fs_info,
10877                                 btrfs_extent_data_ref_root(eb, dref),
10878                                 btrfs_extent_data_ref_objectid(eb, dref),
10879                                 btrfs_extent_data_ref_offset(eb, dref),
10880                                 key.objectid, key.offset,
10881                                 btrfs_extent_data_ref_count(eb, dref));
10882                 err |= ret;
10883                 break;
10884         case BTRFS_SHARED_DATA_REF_KEY:
10885                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10886                 err |= ret;
10887                 break;
10888         default:
10889                 error("extent[%llu %d %llu] has unknown ref type: %d",
10890                         key.objectid, key.type, key.offset, type);
10891                 err |= UNKNOWN_TYPE;
10892                 goto out;
10893         }
10894
10895         ptr += btrfs_extent_inline_ref_size(type);
10896         goto next;
10897
10898 out:
10899         return err;
10900 }
10901
10902 /*
10903  * Check if a dev extent item is referred correctly by its chunk
10904  */
10905 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10906                                  struct extent_buffer *eb, int slot)
10907 {
10908         struct btrfs_root *chunk_root = fs_info->chunk_root;
10909         struct btrfs_dev_extent *ptr;
10910         struct btrfs_path path;
10911         struct btrfs_key chunk_key;
10912         struct btrfs_key devext_key;
10913         struct btrfs_chunk *chunk;
10914         struct extent_buffer *l;
10915         int num_stripes;
10916         u64 length;
10917         int i;
10918         int found_chunk = 0;
10919         int ret;
10920
10921         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10922         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10923         length = btrfs_dev_extent_length(eb, ptr);
10924
10925         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10926         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10927         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10928
10929         btrfs_init_path(&path);
10930         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10931         if (ret)
10932                 goto out;
10933
10934         l = path.nodes[0];
10935         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10936         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10937                                       chunk_key.offset);
10938         if (ret < 0)
10939                 goto out;
10940
10941         if (btrfs_stripe_length(fs_info, l, chunk) != length)
10942                 goto out;
10943
10944         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10945         for (i = 0; i < num_stripes; i++) {
10946                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10947                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10948
10949                 if (devid == devext_key.objectid &&
10950                     offset == devext_key.offset) {
10951                         found_chunk = 1;
10952                         break;
10953                 }
10954         }
10955 out:
10956         btrfs_release_path(&path);
10957         if (!found_chunk) {
10958                 error(
10959                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10960                         devext_key.objectid, devext_key.offset, length);
10961                 return REFERENCER_MISSING;
10962         }
10963         return 0;
10964 }
10965
10966 /*
10967  * Check if the used space is correct with the dev item
10968  */
10969 static int check_dev_item(struct btrfs_fs_info *fs_info,
10970                           struct extent_buffer *eb, int slot)
10971 {
10972         struct btrfs_root *dev_root = fs_info->dev_root;
10973         struct btrfs_dev_item *dev_item;
10974         struct btrfs_path path;
10975         struct btrfs_key key;
10976         struct btrfs_dev_extent *ptr;
10977         u64 dev_id;
10978         u64 used;
10979         u64 total = 0;
10980         int ret;
10981
10982         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10983         dev_id = btrfs_device_id(eb, dev_item);
10984         used = btrfs_device_bytes_used(eb, dev_item);
10985
10986         key.objectid = dev_id;
10987         key.type = BTRFS_DEV_EXTENT_KEY;
10988         key.offset = 0;
10989
10990         btrfs_init_path(&path);
10991         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10992         if (ret < 0) {
10993                 btrfs_item_key_to_cpu(eb, &key, slot);
10994                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10995                         key.objectid, key.type, key.offset);
10996                 btrfs_release_path(&path);
10997                 return REFERENCER_MISSING;
10998         }
10999
11000         /* Iterate dev_extents to calculate the used space of a device */
11001         while (1) {
11002                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11003                         goto next;
11004
11005                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11006                 if (key.objectid > dev_id)
11007                         break;
11008                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11009                         goto next;
11010
11011                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11012                                      struct btrfs_dev_extent);
11013                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11014 next:
11015                 ret = btrfs_next_item(dev_root, &path);
11016                 if (ret)
11017                         break;
11018         }
11019         btrfs_release_path(&path);
11020
11021         if (used != total) {
11022                 btrfs_item_key_to_cpu(eb, &key, slot);
11023                 error(
11024 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11025                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11026                         BTRFS_DEV_EXTENT_KEY, dev_id);
11027                 return ACCOUNTING_MISMATCH;
11028         }
11029         return 0;
11030 }
11031
11032 /*
11033  * Check a block group item with its referener (chunk) and its used space
11034  * with extent/metadata item
11035  */
11036 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11037                                   struct extent_buffer *eb, int slot)
11038 {
11039         struct btrfs_root *extent_root = fs_info->extent_root;
11040         struct btrfs_root *chunk_root = fs_info->chunk_root;
11041         struct btrfs_block_group_item *bi;
11042         struct btrfs_block_group_item bg_item;
11043         struct btrfs_path path;
11044         struct btrfs_key bg_key;
11045         struct btrfs_key chunk_key;
11046         struct btrfs_key extent_key;
11047         struct btrfs_chunk *chunk;
11048         struct extent_buffer *leaf;
11049         struct btrfs_extent_item *ei;
11050         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11051         u64 flags;
11052         u64 bg_flags;
11053         u64 used;
11054         u64 total = 0;
11055         int ret;
11056         int err = 0;
11057
11058         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11059         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11060         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11061         used = btrfs_block_group_used(&bg_item);
11062         bg_flags = btrfs_block_group_flags(&bg_item);
11063
11064         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11065         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11066         chunk_key.offset = bg_key.objectid;
11067
11068         btrfs_init_path(&path);
11069         /* Search for the referencer chunk */
11070         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11071         if (ret) {
11072                 error(
11073                 "block group[%llu %llu] did not find the related chunk item",
11074                         bg_key.objectid, bg_key.offset);
11075                 err |= REFERENCER_MISSING;
11076         } else {
11077                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11078                                         struct btrfs_chunk);
11079                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11080                                                 bg_key.offset) {
11081                         error(
11082         "block group[%llu %llu] related chunk item length does not match",
11083                                 bg_key.objectid, bg_key.offset);
11084                         err |= REFERENCER_MISMATCH;
11085                 }
11086         }
11087         btrfs_release_path(&path);
11088
11089         /* Search from the block group bytenr */
11090         extent_key.objectid = bg_key.objectid;
11091         extent_key.type = 0;
11092         extent_key.offset = 0;
11093
11094         btrfs_init_path(&path);
11095         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11096         if (ret < 0)
11097                 goto out;
11098
11099         /* Iterate extent tree to account used space */
11100         while (1) {
11101                 leaf = path.nodes[0];
11102
11103                 /* Search slot can point to the last item beyond leaf nritems */
11104                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11105                         goto next;
11106
11107                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11108                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11109                         break;
11110
11111                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11112                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11113                         goto next;
11114                 if (extent_key.objectid < bg_key.objectid)
11115                         goto next;
11116
11117                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11118                         total += nodesize;
11119                 else
11120                         total += extent_key.offset;
11121
11122                 ei = btrfs_item_ptr(leaf, path.slots[0],
11123                                     struct btrfs_extent_item);
11124                 flags = btrfs_extent_flags(leaf, ei);
11125                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11126                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11127                                 error(
11128                         "bad extent[%llu, %llu) type mismatch with chunk",
11129                                         extent_key.objectid,
11130                                         extent_key.objectid + extent_key.offset);
11131                                 err |= CHUNK_TYPE_MISMATCH;
11132                         }
11133                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11134                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11135                                     BTRFS_BLOCK_GROUP_METADATA))) {
11136                                 error(
11137                         "bad extent[%llu, %llu) type mismatch with chunk",
11138                                         extent_key.objectid,
11139                                         extent_key.objectid + nodesize);
11140                                 err |= CHUNK_TYPE_MISMATCH;
11141                         }
11142                 }
11143 next:
11144                 ret = btrfs_next_item(extent_root, &path);
11145                 if (ret)
11146                         break;
11147         }
11148
11149 out:
11150         btrfs_release_path(&path);
11151
11152         if (total != used) {
11153                 error(
11154                 "block group[%llu %llu] used %llu but extent items used %llu",
11155                         bg_key.objectid, bg_key.offset, used, total);
11156                 err |= ACCOUNTING_MISMATCH;
11157         }
11158         return err;
11159 }
11160
11161 /*
11162  * Check a chunk item.
11163  * Including checking all referred dev_extents and block group
11164  */
11165 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11166                             struct extent_buffer *eb, int slot)
11167 {
11168         struct btrfs_root *extent_root = fs_info->extent_root;
11169         struct btrfs_root *dev_root = fs_info->dev_root;
11170         struct btrfs_path path;
11171         struct btrfs_key chunk_key;
11172         struct btrfs_key bg_key;
11173         struct btrfs_key devext_key;
11174         struct btrfs_chunk *chunk;
11175         struct extent_buffer *leaf;
11176         struct btrfs_block_group_item *bi;
11177         struct btrfs_block_group_item bg_item;
11178         struct btrfs_dev_extent *ptr;
11179         u64 length;
11180         u64 chunk_end;
11181         u64 stripe_len;
11182         u64 type;
11183         int num_stripes;
11184         u64 offset;
11185         u64 objectid;
11186         int i;
11187         int ret;
11188         int err = 0;
11189
11190         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11191         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11192         length = btrfs_chunk_length(eb, chunk);
11193         chunk_end = chunk_key.offset + length;
11194         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11195                                       chunk_key.offset);
11196         if (ret < 0) {
11197                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11198                         chunk_end);
11199                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11200                 goto out;
11201         }
11202         type = btrfs_chunk_type(eb, chunk);
11203
11204         bg_key.objectid = chunk_key.offset;
11205         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11206         bg_key.offset = length;
11207
11208         btrfs_init_path(&path);
11209         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11210         if (ret) {
11211                 error(
11212                 "chunk[%llu %llu) did not find the related block group item",
11213                         chunk_key.offset, chunk_end);
11214                 err |= REFERENCER_MISSING;
11215         } else{
11216                 leaf = path.nodes[0];
11217                 bi = btrfs_item_ptr(leaf, path.slots[0],
11218                                     struct btrfs_block_group_item);
11219                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11220                                    sizeof(bg_item));
11221                 if (btrfs_block_group_flags(&bg_item) != type) {
11222                         error(
11223 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11224                                 chunk_key.offset, chunk_end, type,
11225                                 btrfs_block_group_flags(&bg_item));
11226                         err |= REFERENCER_MISSING;
11227                 }
11228         }
11229
11230         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11231         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11232         for (i = 0; i < num_stripes; i++) {
11233                 btrfs_release_path(&path);
11234                 btrfs_init_path(&path);
11235                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11236                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11237                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11238
11239                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11240                                         0, 0);
11241                 if (ret)
11242                         goto not_match_dev;
11243
11244                 leaf = path.nodes[0];
11245                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11246                                      struct btrfs_dev_extent);
11247                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11248                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11249                 if (objectid != chunk_key.objectid ||
11250                     offset != chunk_key.offset ||
11251                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11252                         goto not_match_dev;
11253                 continue;
11254 not_match_dev:
11255                 err |= BACKREF_MISSING;
11256                 error(
11257                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11258                         chunk_key.objectid, chunk_end, i);
11259                 continue;
11260         }
11261         btrfs_release_path(&path);
11262 out:
11263         return err;
11264 }
11265
11266 /*
11267  * Main entry function to check known items and update related accounting info
11268  */
11269 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11270 {
11271         struct btrfs_fs_info *fs_info = root->fs_info;
11272         struct btrfs_key key;
11273         int slot = 0;
11274         int type;
11275         struct btrfs_extent_data_ref *dref;
11276         int ret;
11277         int err = 0;
11278
11279 next:
11280         btrfs_item_key_to_cpu(eb, &key, slot);
11281         type = key.type;
11282
11283         switch (type) {
11284         case BTRFS_EXTENT_DATA_KEY:
11285                 ret = check_extent_data_item(root, eb, slot);
11286                 err |= ret;
11287                 break;
11288         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11289                 ret = check_block_group_item(fs_info, eb, slot);
11290                 err |= ret;
11291                 break;
11292         case BTRFS_DEV_ITEM_KEY:
11293                 ret = check_dev_item(fs_info, eb, slot);
11294                 err |= ret;
11295                 break;
11296         case BTRFS_CHUNK_ITEM_KEY:
11297                 ret = check_chunk_item(fs_info, eb, slot);
11298                 err |= ret;
11299                 break;
11300         case BTRFS_DEV_EXTENT_KEY:
11301                 ret = check_dev_extent_item(fs_info, eb, slot);
11302                 err |= ret;
11303                 break;
11304         case BTRFS_EXTENT_ITEM_KEY:
11305         case BTRFS_METADATA_ITEM_KEY:
11306                 ret = check_extent_item(fs_info, eb, slot);
11307                 err |= ret;
11308                 break;
11309         case BTRFS_EXTENT_CSUM_KEY:
11310                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11311                 break;
11312         case BTRFS_TREE_BLOCK_REF_KEY:
11313                 ret = check_tree_block_backref(fs_info, key.offset,
11314                                                key.objectid, -1);
11315                 err |= ret;
11316                 break;
11317         case BTRFS_EXTENT_DATA_REF_KEY:
11318                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11319                 ret = check_extent_data_backref(fs_info,
11320                                 btrfs_extent_data_ref_root(eb, dref),
11321                                 btrfs_extent_data_ref_objectid(eb, dref),
11322                                 btrfs_extent_data_ref_offset(eb, dref),
11323                                 key.objectid, 0,
11324                                 btrfs_extent_data_ref_count(eb, dref));
11325                 err |= ret;
11326                 break;
11327         case BTRFS_SHARED_BLOCK_REF_KEY:
11328                 ret = check_shared_block_backref(fs_info, key.offset,
11329                                                  key.objectid, -1);
11330                 err |= ret;
11331                 break;
11332         case BTRFS_SHARED_DATA_REF_KEY:
11333                 ret = check_shared_data_backref(fs_info, key.offset,
11334                                                 key.objectid);
11335                 err |= ret;
11336                 break;
11337         default:
11338                 break;
11339         }
11340
11341         if (++slot < btrfs_header_nritems(eb))
11342                 goto next;
11343
11344         return err;
11345 }
11346
11347 /*
11348  * Helper function for later fs/subvol tree check.  To determine if a tree
11349  * block should be checked.
11350  * This function will ensure only the direct referencer with lowest rootid to
11351  * check a fs/subvolume tree block.
11352  *
11353  * Backref check at extent tree would detect errors like missing subvolume
11354  * tree, so we can do aggressive check to reduce duplicated checks.
11355  */
11356 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11357 {
11358         struct btrfs_root *extent_root = root->fs_info->extent_root;
11359         struct btrfs_key key;
11360         struct btrfs_path path;
11361         struct extent_buffer *leaf;
11362         int slot;
11363         struct btrfs_extent_item *ei;
11364         unsigned long ptr;
11365         unsigned long end;
11366         int type;
11367         u32 item_size;
11368         u64 offset;
11369         struct btrfs_extent_inline_ref *iref;
11370         int ret;
11371
11372         btrfs_init_path(&path);
11373         key.objectid = btrfs_header_bytenr(eb);
11374         key.type = BTRFS_METADATA_ITEM_KEY;
11375         key.offset = (u64)-1;
11376
11377         /*
11378          * Any failure in backref resolving means we can't determine
11379          * whom the tree block belongs to.
11380          * So in that case, we need to check that tree block
11381          */
11382         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11383         if (ret < 0)
11384                 goto need_check;
11385
11386         ret = btrfs_previous_extent_item(extent_root, &path,
11387                                          btrfs_header_bytenr(eb));
11388         if (ret)
11389                 goto need_check;
11390
11391         leaf = path.nodes[0];
11392         slot = path.slots[0];
11393         btrfs_item_key_to_cpu(leaf, &key, slot);
11394         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11395
11396         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11397                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11398         } else {
11399                 struct btrfs_tree_block_info *info;
11400
11401                 info = (struct btrfs_tree_block_info *)(ei + 1);
11402                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11403         }
11404
11405         item_size = btrfs_item_size_nr(leaf, slot);
11406         ptr = (unsigned long)iref;
11407         end = (unsigned long)ei + item_size;
11408         while (ptr < end) {
11409                 iref = (struct btrfs_extent_inline_ref *)ptr;
11410                 type = btrfs_extent_inline_ref_type(leaf, iref);
11411                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11412
11413                 /*
11414                  * We only check the tree block if current root is
11415                  * the lowest referencer of it.
11416                  */
11417                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11418                     offset < root->objectid) {
11419                         btrfs_release_path(&path);
11420                         return 0;
11421                 }
11422
11423                 ptr += btrfs_extent_inline_ref_size(type);
11424         }
11425         /*
11426          * Normally we should also check keyed tree block ref, but that may be
11427          * very time consuming.  Inlined ref should already make us skip a lot
11428          * of refs now.  So skip search keyed tree block ref.
11429          */
11430
11431 need_check:
11432         btrfs_release_path(&path);
11433         return 1;
11434 }
11435
11436 /*
11437  * Traversal function for tree block. We will do:
11438  * 1) Skip shared fs/subvolume tree blocks
11439  * 2) Update related bytes accounting
11440  * 3) Pre-order traversal
11441  */
11442 static int traverse_tree_block(struct btrfs_root *root,
11443                                 struct extent_buffer *node)
11444 {
11445         struct extent_buffer *eb;
11446         struct btrfs_key key;
11447         struct btrfs_key drop_key;
11448         int level;
11449         u64 nr;
11450         int i;
11451         int err = 0;
11452         int ret;
11453
11454         /*
11455          * Skip shared fs/subvolume tree block, in that case they will
11456          * be checked by referencer with lowest rootid
11457          */
11458         if (is_fstree(root->objectid) && !should_check(root, node))
11459                 return 0;
11460
11461         /* Update bytes accounting */
11462         total_btree_bytes += node->len;
11463         if (fs_root_objectid(btrfs_header_owner(node)))
11464                 total_fs_tree_bytes += node->len;
11465         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11466                 total_extent_tree_bytes += node->len;
11467         if (!found_old_backref &&
11468             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11469             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11470             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11471                 found_old_backref = 1;
11472
11473         /* pre-order tranversal, check itself first */
11474         level = btrfs_header_level(node);
11475         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11476                                    btrfs_header_level(node),
11477                                    btrfs_header_owner(node));
11478         err |= ret;
11479         if (err)
11480                 error(
11481         "check %s failed root %llu bytenr %llu level %d, force continue check",
11482                         level ? "node":"leaf", root->objectid,
11483                         btrfs_header_bytenr(node), btrfs_header_level(node));
11484
11485         if (!level) {
11486                 btree_space_waste += btrfs_leaf_free_space(root, node);
11487                 ret = check_leaf_items(root, node);
11488                 err |= ret;
11489                 return err;
11490         }
11491
11492         nr = btrfs_header_nritems(node);
11493         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11494         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11495                 sizeof(struct btrfs_key_ptr);
11496
11497         /* Then check all its children */
11498         for (i = 0; i < nr; i++) {
11499                 u64 blocknr = btrfs_node_blockptr(node, i);
11500
11501                 btrfs_node_key_to_cpu(node, &key, i);
11502                 if (level == root->root_item.drop_level &&
11503                     is_dropped_key(&key, &drop_key))
11504                         continue;
11505
11506                 /*
11507                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11508                  * to call the function itself.
11509                  */
11510                 eb = read_tree_block(root->fs_info, blocknr, 0);
11511                 if (extent_buffer_uptodate(eb)) {
11512                         ret = traverse_tree_block(root, eb);
11513                         err |= ret;
11514                 }
11515                 free_extent_buffer(eb);
11516         }
11517
11518         return err;
11519 }
11520
11521 /*
11522  * Low memory usage version check_chunks_and_extents.
11523  */
11524 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11525 {
11526         struct btrfs_path path;
11527         struct btrfs_key key;
11528         struct btrfs_root *root1;
11529         struct btrfs_root *cur_root;
11530         int err = 0;
11531         int ret;
11532
11533         root1 = root->fs_info->chunk_root;
11534         ret = traverse_tree_block(root1, root1->node);
11535         err |= ret;
11536
11537         root1 = root->fs_info->tree_root;
11538         ret = traverse_tree_block(root1, root1->node);
11539         err |= ret;
11540
11541         btrfs_init_path(&path);
11542         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11543         key.offset = 0;
11544         key.type = BTRFS_ROOT_ITEM_KEY;
11545
11546         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11547         if (ret) {
11548                 error("cannot find extent treet in tree_root");
11549                 goto out;
11550         }
11551
11552         while (1) {
11553                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11554                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11555                         goto next;
11556                 key.offset = (u64)-1;
11557
11558                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11559                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11560                                         &key);
11561                 else
11562                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11563                 if (IS_ERR(cur_root) || !cur_root) {
11564                         error("failed to read tree: %lld", key.objectid);
11565                         goto next;
11566                 }
11567
11568                 ret = traverse_tree_block(cur_root, cur_root->node);
11569                 err |= ret;
11570
11571                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11572                         btrfs_free_fs_root(cur_root);
11573 next:
11574                 ret = btrfs_next_item(root1, &path);
11575                 if (ret)
11576                         goto out;
11577         }
11578
11579 out:
11580         btrfs_release_path(&path);
11581         return err;
11582 }
11583
11584 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11585                            struct btrfs_root *root, int overwrite)
11586 {
11587         struct extent_buffer *c;
11588         struct extent_buffer *old = root->node;
11589         int level;
11590         int ret;
11591         struct btrfs_disk_key disk_key = {0,0,0};
11592
11593         level = 0;
11594
11595         if (overwrite) {
11596                 c = old;
11597                 extent_buffer_get(c);
11598                 goto init;
11599         }
11600         c = btrfs_alloc_free_block(trans, root,
11601                                    root->fs_info->nodesize,
11602                                    root->root_key.objectid,
11603                                    &disk_key, level, 0, 0);
11604         if (IS_ERR(c)) {
11605                 c = old;
11606                 extent_buffer_get(c);
11607                 overwrite = 1;
11608         }
11609 init:
11610         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11611         btrfs_set_header_level(c, level);
11612         btrfs_set_header_bytenr(c, c->start);
11613         btrfs_set_header_generation(c, trans->transid);
11614         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11615         btrfs_set_header_owner(c, root->root_key.objectid);
11616
11617         write_extent_buffer(c, root->fs_info->fsid,
11618                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11619
11620         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11621                             btrfs_header_chunk_tree_uuid(c),
11622                             BTRFS_UUID_SIZE);
11623
11624         btrfs_mark_buffer_dirty(c);
11625         /*
11626          * this case can happen in the following case:
11627          *
11628          * 1.overwrite previous root.
11629          *
11630          * 2.reinit reloc data root, this is because we skip pin
11631          * down reloc data tree before which means we can allocate
11632          * same block bytenr here.
11633          */
11634         if (old->start == c->start) {
11635                 btrfs_set_root_generation(&root->root_item,
11636                                           trans->transid);
11637                 root->root_item.level = btrfs_header_level(root->node);
11638                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11639                                         &root->root_key, &root->root_item);
11640                 if (ret) {
11641                         free_extent_buffer(c);
11642                         return ret;
11643                 }
11644         }
11645         free_extent_buffer(old);
11646         root->node = c;
11647         add_root_to_dirty_list(root);
11648         return 0;
11649 }
11650
11651 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11652                                 struct extent_buffer *eb, int tree_root)
11653 {
11654         struct extent_buffer *tmp;
11655         struct btrfs_root_item *ri;
11656         struct btrfs_key key;
11657         u64 bytenr;
11658         int level = btrfs_header_level(eb);
11659         int nritems;
11660         int ret;
11661         int i;
11662
11663         /*
11664          * If we have pinned this block before, don't pin it again.
11665          * This can not only avoid forever loop with broken filesystem
11666          * but also give us some speedups.
11667          */
11668         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11669                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11670                 return 0;
11671
11672         btrfs_pin_extent(fs_info, eb->start, eb->len);
11673
11674         nritems = btrfs_header_nritems(eb);
11675         for (i = 0; i < nritems; i++) {
11676                 if (level == 0) {
11677                         btrfs_item_key_to_cpu(eb, &key, i);
11678                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11679                                 continue;
11680                         /* Skip the extent root and reloc roots */
11681                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11682                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11683                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11684                                 continue;
11685                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11686                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11687
11688                         /*
11689                          * If at any point we start needing the real root we
11690                          * will have to build a stump root for the root we are
11691                          * in, but for now this doesn't actually use the root so
11692                          * just pass in extent_root.
11693                          */
11694                         tmp = read_tree_block(fs_info, bytenr, 0);
11695                         if (!extent_buffer_uptodate(tmp)) {
11696                                 fprintf(stderr, "Error reading root block\n");
11697                                 return -EIO;
11698                         }
11699                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11700                         free_extent_buffer(tmp);
11701                         if (ret)
11702                                 return ret;
11703                 } else {
11704                         bytenr = btrfs_node_blockptr(eb, i);
11705
11706                         /* If we aren't the tree root don't read the block */
11707                         if (level == 1 && !tree_root) {
11708                                 btrfs_pin_extent(fs_info, bytenr,
11709                                                 fs_info->nodesize);
11710                                 continue;
11711                         }
11712
11713                         tmp = read_tree_block(fs_info, bytenr, 0);
11714                         if (!extent_buffer_uptodate(tmp)) {
11715                                 fprintf(stderr, "Error reading tree block\n");
11716                                 return -EIO;
11717                         }
11718                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11719                         free_extent_buffer(tmp);
11720                         if (ret)
11721                                 return ret;
11722                 }
11723         }
11724
11725         return 0;
11726 }
11727
11728 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11729 {
11730         int ret;
11731
11732         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11733         if (ret)
11734                 return ret;
11735
11736         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11737 }
11738
11739 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11740 {
11741         struct btrfs_block_group_cache *cache;
11742         struct btrfs_path path;
11743         struct extent_buffer *leaf;
11744         struct btrfs_chunk *chunk;
11745         struct btrfs_key key;
11746         int ret;
11747         u64 start;
11748
11749         btrfs_init_path(&path);
11750         key.objectid = 0;
11751         key.type = BTRFS_CHUNK_ITEM_KEY;
11752         key.offset = 0;
11753         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11754         if (ret < 0) {
11755                 btrfs_release_path(&path);
11756                 return ret;
11757         }
11758
11759         /*
11760          * We do this in case the block groups were screwed up and had alloc
11761          * bits that aren't actually set on the chunks.  This happens with
11762          * restored images every time and could happen in real life I guess.
11763          */
11764         fs_info->avail_data_alloc_bits = 0;
11765         fs_info->avail_metadata_alloc_bits = 0;
11766         fs_info->avail_system_alloc_bits = 0;
11767
11768         /* First we need to create the in-memory block groups */
11769         while (1) {
11770                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11771                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11772                         if (ret < 0) {
11773                                 btrfs_release_path(&path);
11774                                 return ret;
11775                         }
11776                         if (ret) {
11777                                 ret = 0;
11778                                 break;
11779                         }
11780                 }
11781                 leaf = path.nodes[0];
11782                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11783                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11784                         path.slots[0]++;
11785                         continue;
11786                 }
11787
11788                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11789                 btrfs_add_block_group(fs_info, 0,
11790                                       btrfs_chunk_type(leaf, chunk),
11791                                       key.objectid, key.offset,
11792                                       btrfs_chunk_length(leaf, chunk));
11793                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11794                                  key.offset + btrfs_chunk_length(leaf, chunk));
11795                 path.slots[0]++;
11796         }
11797         start = 0;
11798         while (1) {
11799                 cache = btrfs_lookup_first_block_group(fs_info, start);
11800                 if (!cache)
11801                         break;
11802                 cache->cached = 1;
11803                 start = cache->key.objectid + cache->key.offset;
11804         }
11805
11806         btrfs_release_path(&path);
11807         return 0;
11808 }
11809
11810 static int reset_balance(struct btrfs_trans_handle *trans,
11811                          struct btrfs_fs_info *fs_info)
11812 {
11813         struct btrfs_root *root = fs_info->tree_root;
11814         struct btrfs_path path;
11815         struct extent_buffer *leaf;
11816         struct btrfs_key key;
11817         int del_slot, del_nr = 0;
11818         int ret;
11819         int found = 0;
11820
11821         btrfs_init_path(&path);
11822         key.objectid = BTRFS_BALANCE_OBJECTID;
11823         key.type = BTRFS_BALANCE_ITEM_KEY;
11824         key.offset = 0;
11825         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11826         if (ret) {
11827                 if (ret > 0)
11828                         ret = 0;
11829                 if (!ret)
11830                         goto reinit_data_reloc;
11831                 else
11832                         goto out;
11833         }
11834
11835         ret = btrfs_del_item(trans, root, &path);
11836         if (ret)
11837                 goto out;
11838         btrfs_release_path(&path);
11839
11840         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11841         key.type = BTRFS_ROOT_ITEM_KEY;
11842         key.offset = 0;
11843         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11844         if (ret < 0)
11845                 goto out;
11846         while (1) {
11847                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11848                         if (!found)
11849                                 break;
11850
11851                         if (del_nr) {
11852                                 ret = btrfs_del_items(trans, root, &path,
11853                                                       del_slot, del_nr);
11854                                 del_nr = 0;
11855                                 if (ret)
11856                                         goto out;
11857                         }
11858                         key.offset++;
11859                         btrfs_release_path(&path);
11860
11861                         found = 0;
11862                         ret = btrfs_search_slot(trans, root, &key, &path,
11863                                                 -1, 1);
11864                         if (ret < 0)
11865                                 goto out;
11866                         continue;
11867                 }
11868                 found = 1;
11869                 leaf = path.nodes[0];
11870                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11871                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11872                         break;
11873                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11874                         path.slots[0]++;
11875                         continue;
11876                 }
11877                 if (!del_nr) {
11878                         del_slot = path.slots[0];
11879                         del_nr = 1;
11880                 } else {
11881                         del_nr++;
11882                 }
11883                 path.slots[0]++;
11884         }
11885
11886         if (del_nr) {
11887                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11888                 if (ret)
11889                         goto out;
11890         }
11891         btrfs_release_path(&path);
11892
11893 reinit_data_reloc:
11894         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11895         key.type = BTRFS_ROOT_ITEM_KEY;
11896         key.offset = (u64)-1;
11897         root = btrfs_read_fs_root(fs_info, &key);
11898         if (IS_ERR(root)) {
11899                 fprintf(stderr, "Error reading data reloc tree\n");
11900                 ret = PTR_ERR(root);
11901                 goto out;
11902         }
11903         record_root_in_trans(trans, root);
11904         ret = btrfs_fsck_reinit_root(trans, root, 0);
11905         if (ret)
11906                 goto out;
11907         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11908 out:
11909         btrfs_release_path(&path);
11910         return ret;
11911 }
11912
11913 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11914                               struct btrfs_fs_info *fs_info)
11915 {
11916         u64 start = 0;
11917         int ret;
11918
11919         /*
11920          * The only reason we don't do this is because right now we're just
11921          * walking the trees we find and pinning down their bytes, we don't look
11922          * at any of the leaves.  In order to do mixed groups we'd have to check
11923          * the leaves of any fs roots and pin down the bytes for any file
11924          * extents we find.  Not hard but why do it if we don't have to?
11925          */
11926         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11927                 fprintf(stderr, "We don't support re-initing the extent tree "
11928                         "for mixed block groups yet, please notify a btrfs "
11929                         "developer you want to do this so they can add this "
11930                         "functionality.\n");
11931                 return -EINVAL;
11932         }
11933
11934         /*
11935          * first we need to walk all of the trees except the extent tree and pin
11936          * down the bytes that are in use so we don't overwrite any existing
11937          * metadata.
11938          */
11939         ret = pin_metadata_blocks(fs_info);
11940         if (ret) {
11941                 fprintf(stderr, "error pinning down used bytes\n");
11942                 return ret;
11943         }
11944
11945         /*
11946          * Need to drop all the block groups since we're going to recreate all
11947          * of them again.
11948          */
11949         btrfs_free_block_groups(fs_info);
11950         ret = reset_block_groups(fs_info);
11951         if (ret) {
11952                 fprintf(stderr, "error resetting the block groups\n");
11953                 return ret;
11954         }
11955
11956         /* Ok we can allocate now, reinit the extent root */
11957         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11958         if (ret) {
11959                 fprintf(stderr, "extent root initialization failed\n");
11960                 /*
11961                  * When the transaction code is updated we should end the
11962                  * transaction, but for now progs only knows about commit so
11963                  * just return an error.
11964                  */
11965                 return ret;
11966         }
11967
11968         /*
11969          * Now we have all the in-memory block groups setup so we can make
11970          * allocations properly, and the metadata we care about is safe since we
11971          * pinned all of it above.
11972          */
11973         while (1) {
11974                 struct btrfs_block_group_cache *cache;
11975
11976                 cache = btrfs_lookup_first_block_group(fs_info, start);
11977                 if (!cache)
11978                         break;
11979                 start = cache->key.objectid + cache->key.offset;
11980                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11981                                         &cache->key, &cache->item,
11982                                         sizeof(cache->item));
11983                 if (ret) {
11984                         fprintf(stderr, "Error adding block group\n");
11985                         return ret;
11986                 }
11987                 btrfs_extent_post_op(trans, fs_info->extent_root);
11988         }
11989
11990         ret = reset_balance(trans, fs_info);
11991         if (ret)
11992                 fprintf(stderr, "error resetting the pending balance\n");
11993
11994         return ret;
11995 }
11996
11997 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11998 {
11999         struct btrfs_path path;
12000         struct btrfs_trans_handle *trans;
12001         struct btrfs_key key;
12002         int ret;
12003
12004         printf("Recowing metadata block %llu\n", eb->start);
12005         key.objectid = btrfs_header_owner(eb);
12006         key.type = BTRFS_ROOT_ITEM_KEY;
12007         key.offset = (u64)-1;
12008
12009         root = btrfs_read_fs_root(root->fs_info, &key);
12010         if (IS_ERR(root)) {
12011                 fprintf(stderr, "Couldn't find owner root %llu\n",
12012                         key.objectid);
12013                 return PTR_ERR(root);
12014         }
12015
12016         trans = btrfs_start_transaction(root, 1);
12017         if (IS_ERR(trans))
12018                 return PTR_ERR(trans);
12019
12020         btrfs_init_path(&path);
12021         path.lowest_level = btrfs_header_level(eb);
12022         if (path.lowest_level)
12023                 btrfs_node_key_to_cpu(eb, &key, 0);
12024         else
12025                 btrfs_item_key_to_cpu(eb, &key, 0);
12026
12027         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12028         btrfs_commit_transaction(trans, root);
12029         btrfs_release_path(&path);
12030         return ret;
12031 }
12032
12033 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12034 {
12035         struct btrfs_path path;
12036         struct btrfs_trans_handle *trans;
12037         struct btrfs_key key;
12038         int ret;
12039
12040         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12041                bad->key.type, bad->key.offset);
12042         key.objectid = bad->root_id;
12043         key.type = BTRFS_ROOT_ITEM_KEY;
12044         key.offset = (u64)-1;
12045
12046         root = btrfs_read_fs_root(root->fs_info, &key);
12047         if (IS_ERR(root)) {
12048                 fprintf(stderr, "Couldn't find owner root %llu\n",
12049                         key.objectid);
12050                 return PTR_ERR(root);
12051         }
12052
12053         trans = btrfs_start_transaction(root, 1);
12054         if (IS_ERR(trans))
12055                 return PTR_ERR(trans);
12056
12057         btrfs_init_path(&path);
12058         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12059         if (ret) {
12060                 if (ret > 0)
12061                         ret = 0;
12062                 goto out;
12063         }
12064         ret = btrfs_del_item(trans, root, &path);
12065 out:
12066         btrfs_commit_transaction(trans, root);
12067         btrfs_release_path(&path);
12068         return ret;
12069 }
12070
12071 static int zero_log_tree(struct btrfs_root *root)
12072 {
12073         struct btrfs_trans_handle *trans;
12074         int ret;
12075
12076         trans = btrfs_start_transaction(root, 1);
12077         if (IS_ERR(trans)) {
12078                 ret = PTR_ERR(trans);
12079                 return ret;
12080         }
12081         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12082         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12083         ret = btrfs_commit_transaction(trans, root);
12084         return ret;
12085 }
12086
12087 static int populate_csum(struct btrfs_trans_handle *trans,
12088                          struct btrfs_root *csum_root, char *buf, u64 start,
12089                          u64 len)
12090 {
12091         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12092         u64 offset = 0;
12093         u64 sectorsize;
12094         int ret = 0;
12095
12096         while (offset < len) {
12097                 sectorsize = fs_info->sectorsize;
12098                 ret = read_extent_data(fs_info, buf, start + offset,
12099                                        &sectorsize, 0);
12100                 if (ret)
12101                         break;
12102                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12103                                             start + offset, buf, sectorsize);
12104                 if (ret)
12105                         break;
12106                 offset += sectorsize;
12107         }
12108         return ret;
12109 }
12110
12111 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12112                                       struct btrfs_root *csum_root,
12113                                       struct btrfs_root *cur_root)
12114 {
12115         struct btrfs_path path;
12116         struct btrfs_key key;
12117         struct extent_buffer *node;
12118         struct btrfs_file_extent_item *fi;
12119         char *buf = NULL;
12120         u64 start = 0;
12121         u64 len = 0;
12122         int slot = 0;
12123         int ret = 0;
12124
12125         buf = malloc(cur_root->fs_info->sectorsize);
12126         if (!buf)
12127                 return -ENOMEM;
12128
12129         btrfs_init_path(&path);
12130         key.objectid = 0;
12131         key.offset = 0;
12132         key.type = 0;
12133         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12134         if (ret < 0)
12135                 goto out;
12136         /* Iterate all regular file extents and fill its csum */
12137         while (1) {
12138                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12139
12140                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12141                         goto next;
12142                 node = path.nodes[0];
12143                 slot = path.slots[0];
12144                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12145                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12146                         goto next;
12147                 start = btrfs_file_extent_disk_bytenr(node, fi);
12148                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12149
12150                 ret = populate_csum(trans, csum_root, buf, start, len);
12151                 if (ret == -EEXIST)
12152                         ret = 0;
12153                 if (ret < 0)
12154                         goto out;
12155 next:
12156                 /*
12157                  * TODO: if next leaf is corrupted, jump to nearest next valid
12158                  * leaf.
12159                  */
12160                 ret = btrfs_next_item(cur_root, &path);
12161                 if (ret < 0)
12162                         goto out;
12163                 if (ret > 0) {
12164                         ret = 0;
12165                         goto out;
12166                 }
12167         }
12168
12169 out:
12170         btrfs_release_path(&path);
12171         free(buf);
12172         return ret;
12173 }
12174
12175 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12176                                   struct btrfs_root *csum_root)
12177 {
12178         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12179         struct btrfs_path path;
12180         struct btrfs_root *tree_root = fs_info->tree_root;
12181         struct btrfs_root *cur_root;
12182         struct extent_buffer *node;
12183         struct btrfs_key key;
12184         int slot = 0;
12185         int ret = 0;
12186
12187         btrfs_init_path(&path);
12188         key.objectid = BTRFS_FS_TREE_OBJECTID;
12189         key.offset = 0;
12190         key.type = BTRFS_ROOT_ITEM_KEY;
12191         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12192         if (ret < 0)
12193                 goto out;
12194         if (ret > 0) {
12195                 ret = -ENOENT;
12196                 goto out;
12197         }
12198
12199         while (1) {
12200                 node = path.nodes[0];
12201                 slot = path.slots[0];
12202                 btrfs_item_key_to_cpu(node, &key, slot);
12203                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12204                         goto out;
12205                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12206                         goto next;
12207                 if (!is_fstree(key.objectid))
12208                         goto next;
12209                 key.offset = (u64)-1;
12210
12211                 cur_root = btrfs_read_fs_root(fs_info, &key);
12212                 if (IS_ERR(cur_root) || !cur_root) {
12213                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12214                                 key.objectid);
12215                         goto out;
12216                 }
12217                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12218                                 cur_root);
12219                 if (ret < 0)
12220                         goto out;
12221 next:
12222                 ret = btrfs_next_item(tree_root, &path);
12223                 if (ret > 0) {
12224                         ret = 0;
12225                         goto out;
12226                 }
12227                 if (ret < 0)
12228                         goto out;
12229         }
12230
12231 out:
12232         btrfs_release_path(&path);
12233         return ret;
12234 }
12235
12236 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12237                                       struct btrfs_root *csum_root)
12238 {
12239         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12240         struct btrfs_path path;
12241         struct btrfs_extent_item *ei;
12242         struct extent_buffer *leaf;
12243         char *buf;
12244         struct btrfs_key key;
12245         int ret;
12246
12247         btrfs_init_path(&path);
12248         key.objectid = 0;
12249         key.type = BTRFS_EXTENT_ITEM_KEY;
12250         key.offset = 0;
12251         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12252         if (ret < 0) {
12253                 btrfs_release_path(&path);
12254                 return ret;
12255         }
12256
12257         buf = malloc(csum_root->fs_info->sectorsize);
12258         if (!buf) {
12259                 btrfs_release_path(&path);
12260                 return -ENOMEM;
12261         }
12262
12263         while (1) {
12264                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12265                         ret = btrfs_next_leaf(extent_root, &path);
12266                         if (ret < 0)
12267                                 break;
12268                         if (ret) {
12269                                 ret = 0;
12270                                 break;
12271                         }
12272                 }
12273                 leaf = path.nodes[0];
12274
12275                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12276                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12277                         path.slots[0]++;
12278                         continue;
12279                 }
12280
12281                 ei = btrfs_item_ptr(leaf, path.slots[0],
12282                                     struct btrfs_extent_item);
12283                 if (!(btrfs_extent_flags(leaf, ei) &
12284                       BTRFS_EXTENT_FLAG_DATA)) {
12285                         path.slots[0]++;
12286                         continue;
12287                 }
12288
12289                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12290                                     key.offset);
12291                 if (ret)
12292                         break;
12293                 path.slots[0]++;
12294         }
12295
12296         btrfs_release_path(&path);
12297         free(buf);
12298         return ret;
12299 }
12300
12301 /*
12302  * Recalculate the csum and put it into the csum tree.
12303  *
12304  * Extent tree init will wipe out all the extent info, so in that case, we
12305  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12306  * will use fs/subvol trees to init the csum tree.
12307  */
12308 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12309                           struct btrfs_root *csum_root,
12310                           int search_fs_tree)
12311 {
12312         if (search_fs_tree)
12313                 return fill_csum_tree_from_fs(trans, csum_root);
12314         else
12315                 return fill_csum_tree_from_extent(trans, csum_root);
12316 }
12317
12318 static void free_roots_info_cache(void)
12319 {
12320         if (!roots_info_cache)
12321                 return;
12322
12323         while (!cache_tree_empty(roots_info_cache)) {
12324                 struct cache_extent *entry;
12325                 struct root_item_info *rii;
12326
12327                 entry = first_cache_extent(roots_info_cache);
12328                 if (!entry)
12329                         break;
12330                 remove_cache_extent(roots_info_cache, entry);
12331                 rii = container_of(entry, struct root_item_info, cache_extent);
12332                 free(rii);
12333         }
12334
12335         free(roots_info_cache);
12336         roots_info_cache = NULL;
12337 }
12338
12339 static int build_roots_info_cache(struct btrfs_fs_info *info)
12340 {
12341         int ret = 0;
12342         struct btrfs_key key;
12343         struct extent_buffer *leaf;
12344         struct btrfs_path path;
12345
12346         if (!roots_info_cache) {
12347                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12348                 if (!roots_info_cache)
12349                         return -ENOMEM;
12350                 cache_tree_init(roots_info_cache);
12351         }
12352
12353         btrfs_init_path(&path);
12354         key.objectid = 0;
12355         key.type = BTRFS_EXTENT_ITEM_KEY;
12356         key.offset = 0;
12357         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12358         if (ret < 0)
12359                 goto out;
12360         leaf = path.nodes[0];
12361
12362         while (1) {
12363                 struct btrfs_key found_key;
12364                 struct btrfs_extent_item *ei;
12365                 struct btrfs_extent_inline_ref *iref;
12366                 int slot = path.slots[0];
12367                 int type;
12368                 u64 flags;
12369                 u64 root_id;
12370                 u8 level;
12371                 struct cache_extent *entry;
12372                 struct root_item_info *rii;
12373
12374                 if (slot >= btrfs_header_nritems(leaf)) {
12375                         ret = btrfs_next_leaf(info->extent_root, &path);
12376                         if (ret < 0) {
12377                                 break;
12378                         } else if (ret) {
12379                                 ret = 0;
12380                                 break;
12381                         }
12382                         leaf = path.nodes[0];
12383                         slot = path.slots[0];
12384                 }
12385
12386                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12387
12388                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12389                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12390                         goto next;
12391
12392                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12393                 flags = btrfs_extent_flags(leaf, ei);
12394
12395                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12396                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12397                         goto next;
12398
12399                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12400                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12401                         level = found_key.offset;
12402                 } else {
12403                         struct btrfs_tree_block_info *binfo;
12404
12405                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12406                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12407                         level = btrfs_tree_block_level(leaf, binfo);
12408                 }
12409
12410                 /*
12411                  * For a root extent, it must be of the following type and the
12412                  * first (and only one) iref in the item.
12413                  */
12414                 type = btrfs_extent_inline_ref_type(leaf, iref);
12415                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12416                         goto next;
12417
12418                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12419                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12420                 if (!entry) {
12421                         rii = malloc(sizeof(struct root_item_info));
12422                         if (!rii) {
12423                                 ret = -ENOMEM;
12424                                 goto out;
12425                         }
12426                         rii->cache_extent.start = root_id;
12427                         rii->cache_extent.size = 1;
12428                         rii->level = (u8)-1;
12429                         entry = &rii->cache_extent;
12430                         ret = insert_cache_extent(roots_info_cache, entry);
12431                         ASSERT(ret == 0);
12432                 } else {
12433                         rii = container_of(entry, struct root_item_info,
12434                                            cache_extent);
12435                 }
12436
12437                 ASSERT(rii->cache_extent.start == root_id);
12438                 ASSERT(rii->cache_extent.size == 1);
12439
12440                 if (level > rii->level || rii->level == (u8)-1) {
12441                         rii->level = level;
12442                         rii->bytenr = found_key.objectid;
12443                         rii->gen = btrfs_extent_generation(leaf, ei);
12444                         rii->node_count = 1;
12445                 } else if (level == rii->level) {
12446                         rii->node_count++;
12447                 }
12448 next:
12449                 path.slots[0]++;
12450         }
12451
12452 out:
12453         btrfs_release_path(&path);
12454
12455         return ret;
12456 }
12457
12458 static int maybe_repair_root_item(struct btrfs_path *path,
12459                                   const struct btrfs_key *root_key,
12460                                   const int read_only_mode)
12461 {
12462         const u64 root_id = root_key->objectid;
12463         struct cache_extent *entry;
12464         struct root_item_info *rii;
12465         struct btrfs_root_item ri;
12466         unsigned long offset;
12467
12468         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12469         if (!entry) {
12470                 fprintf(stderr,
12471                         "Error: could not find extent items for root %llu\n",
12472                         root_key->objectid);
12473                 return -ENOENT;
12474         }
12475
12476         rii = container_of(entry, struct root_item_info, cache_extent);
12477         ASSERT(rii->cache_extent.start == root_id);
12478         ASSERT(rii->cache_extent.size == 1);
12479
12480         if (rii->node_count != 1) {
12481                 fprintf(stderr,
12482                         "Error: could not find btree root extent for root %llu\n",
12483                         root_id);
12484                 return -ENOENT;
12485         }
12486
12487         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12488         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12489
12490         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12491             btrfs_root_level(&ri) != rii->level ||
12492             btrfs_root_generation(&ri) != rii->gen) {
12493
12494                 /*
12495                  * If we're in repair mode but our caller told us to not update
12496                  * the root item, i.e. just check if it needs to be updated, don't
12497                  * print this message, since the caller will call us again shortly
12498                  * for the same root item without read only mode (the caller will
12499                  * open a transaction first).
12500                  */
12501                 if (!(read_only_mode && repair))
12502                         fprintf(stderr,
12503                                 "%sroot item for root %llu,"
12504                                 " current bytenr %llu, current gen %llu, current level %u,"
12505                                 " new bytenr %llu, new gen %llu, new level %u\n",
12506                                 (read_only_mode ? "" : "fixing "),
12507                                 root_id,
12508                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12509                                 btrfs_root_level(&ri),
12510                                 rii->bytenr, rii->gen, rii->level);
12511
12512                 if (btrfs_root_generation(&ri) > rii->gen) {
12513                         fprintf(stderr,
12514                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12515                                 root_id, btrfs_root_generation(&ri), rii->gen);
12516                         return -EINVAL;
12517                 }
12518
12519                 if (!read_only_mode) {
12520                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12521                         btrfs_set_root_level(&ri, rii->level);
12522                         btrfs_set_root_generation(&ri, rii->gen);
12523                         write_extent_buffer(path->nodes[0], &ri,
12524                                             offset, sizeof(ri));
12525                 }
12526
12527                 return 1;
12528         }
12529
12530         return 0;
12531 }
12532
12533 /*
12534  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12535  * caused read-only snapshots to be corrupted if they were created at a moment
12536  * when the source subvolume/snapshot had orphan items. The issue was that the
12537  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12538  * node instead of the post orphan cleanup root node.
12539  * So this function, and its callees, just detects and fixes those cases. Even
12540  * though the regression was for read-only snapshots, this function applies to
12541  * any snapshot/subvolume root.
12542  * This must be run before any other repair code - not doing it so, makes other
12543  * repair code delete or modify backrefs in the extent tree for example, which
12544  * will result in an inconsistent fs after repairing the root items.
12545  */
12546 static int repair_root_items(struct btrfs_fs_info *info)
12547 {
12548         struct btrfs_path path;
12549         struct btrfs_key key;
12550         struct extent_buffer *leaf;
12551         struct btrfs_trans_handle *trans = NULL;
12552         int ret = 0;
12553         int bad_roots = 0;
12554         int need_trans = 0;
12555
12556         btrfs_init_path(&path);
12557
12558         ret = build_roots_info_cache(info);
12559         if (ret)
12560                 goto out;
12561
12562         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12563         key.type = BTRFS_ROOT_ITEM_KEY;
12564         key.offset = 0;
12565
12566 again:
12567         /*
12568          * Avoid opening and committing transactions if a leaf doesn't have
12569          * any root items that need to be fixed, so that we avoid rotating
12570          * backup roots unnecessarily.
12571          */
12572         if (need_trans) {
12573                 trans = btrfs_start_transaction(info->tree_root, 1);
12574                 if (IS_ERR(trans)) {
12575                         ret = PTR_ERR(trans);
12576                         goto out;
12577                 }
12578         }
12579
12580         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12581                                 0, trans ? 1 : 0);
12582         if (ret < 0)
12583                 goto out;
12584         leaf = path.nodes[0];
12585
12586         while (1) {
12587                 struct btrfs_key found_key;
12588
12589                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12590                         int no_more_keys = find_next_key(&path, &key);
12591
12592                         btrfs_release_path(&path);
12593                         if (trans) {
12594                                 ret = btrfs_commit_transaction(trans,
12595                                                                info->tree_root);
12596                                 trans = NULL;
12597                                 if (ret < 0)
12598                                         goto out;
12599                         }
12600                         need_trans = 0;
12601                         if (no_more_keys)
12602                                 break;
12603                         goto again;
12604                 }
12605
12606                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12607
12608                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12609                         goto next;
12610                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12611                         goto next;
12612
12613                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12614                 if (ret < 0)
12615                         goto out;
12616                 if (ret) {
12617                         if (!trans && repair) {
12618                                 need_trans = 1;
12619                                 key = found_key;
12620                                 btrfs_release_path(&path);
12621                                 goto again;
12622                         }
12623                         bad_roots++;
12624                 }
12625 next:
12626                 path.slots[0]++;
12627         }
12628         ret = 0;
12629 out:
12630         free_roots_info_cache();
12631         btrfs_release_path(&path);
12632         if (trans)
12633                 btrfs_commit_transaction(trans, info->tree_root);
12634         if (ret < 0)
12635                 return ret;
12636
12637         return bad_roots;
12638 }
12639
12640 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12641 {
12642         struct btrfs_trans_handle *trans;
12643         struct btrfs_block_group_cache *bg_cache;
12644         u64 current = 0;
12645         int ret = 0;
12646
12647         /* Clear all free space cache inodes and its extent data */
12648         while (1) {
12649                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12650                 if (!bg_cache)
12651                         break;
12652                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12653                 if (ret < 0)
12654                         return ret;
12655                 current = bg_cache->key.objectid + bg_cache->key.offset;
12656         }
12657
12658         /* Don't forget to set cache_generation to -1 */
12659         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12660         if (IS_ERR(trans)) {
12661                 error("failed to update super block cache generation");
12662                 return PTR_ERR(trans);
12663         }
12664         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12665         btrfs_commit_transaction(trans, fs_info->tree_root);
12666
12667         return ret;
12668 }
12669
12670 const char * const cmd_check_usage[] = {
12671         "btrfs check [options] <device>",
12672         "Check structural integrity of a filesystem (unmounted).",
12673         "Check structural integrity of an unmounted filesystem. Verify internal",
12674         "trees' consistency and item connectivity. In the repair mode try to",
12675         "fix the problems found. ",
12676         "WARNING: the repair mode is considered dangerous",
12677         "",
12678         "-s|--super <superblock>     use this superblock copy",
12679         "-b|--backup                 use the first valid backup root copy",
12680         "--repair                    try to repair the filesystem",
12681         "--readonly                  run in read-only mode (default)",
12682         "--init-csum-tree            create a new CRC tree",
12683         "--init-extent-tree          create a new extent tree",
12684         "--mode <MODE>               allows choice of memory/IO trade-offs",
12685         "                            where MODE is one of:",
12686         "                            original - read inodes and extents to memory (requires",
12687         "                                       more memory, does less IO)",
12688         "                            lowmem   - try to use less memory but read blocks again",
12689         "                                       when needed",
12690         "--check-data-csum           verify checksums of data blocks",
12691         "-Q|--qgroup-report          print a report on qgroup consistency",
12692         "-E|--subvol-extents <subvolid>",
12693         "                            print subvolume extents and sharing state",
12694         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12695         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12696         "-p|--progress               indicate progress",
12697         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12698         NULL
12699 };
12700
12701 int cmd_check(int argc, char **argv)
12702 {
12703         struct cache_tree root_cache;
12704         struct btrfs_root *root;
12705         struct btrfs_fs_info *info;
12706         u64 bytenr = 0;
12707         u64 subvolid = 0;
12708         u64 tree_root_bytenr = 0;
12709         u64 chunk_root_bytenr = 0;
12710         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12711         int ret;
12712         int err = 0;
12713         u64 num;
12714         int init_csum_tree = 0;
12715         int readonly = 0;
12716         int clear_space_cache = 0;
12717         int qgroup_report = 0;
12718         int qgroups_repaired = 0;
12719         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12720
12721         while(1) {
12722                 int c;
12723                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12724                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12725                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12726                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12727                 static const struct option long_options[] = {
12728                         { "super", required_argument, NULL, 's' },
12729                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12730                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12731                         { "init-csum-tree", no_argument, NULL,
12732                                 GETOPT_VAL_INIT_CSUM },
12733                         { "init-extent-tree", no_argument, NULL,
12734                                 GETOPT_VAL_INIT_EXTENT },
12735                         { "check-data-csum", no_argument, NULL,
12736                                 GETOPT_VAL_CHECK_CSUM },
12737                         { "backup", no_argument, NULL, 'b' },
12738                         { "subvol-extents", required_argument, NULL, 'E' },
12739                         { "qgroup-report", no_argument, NULL, 'Q' },
12740                         { "tree-root", required_argument, NULL, 'r' },
12741                         { "chunk-root", required_argument, NULL,
12742                                 GETOPT_VAL_CHUNK_TREE },
12743                         { "progress", no_argument, NULL, 'p' },
12744                         { "mode", required_argument, NULL,
12745                                 GETOPT_VAL_MODE },
12746                         { "clear-space-cache", required_argument, NULL,
12747                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12748                         { NULL, 0, NULL, 0}
12749                 };
12750
12751                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12752                 if (c < 0)
12753                         break;
12754                 switch(c) {
12755                         case 'a': /* ignored */ break;
12756                         case 'b':
12757                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12758                                 break;
12759                         case 's':
12760                                 num = arg_strtou64(optarg);
12761                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12762                                         error(
12763                                         "super mirror should be less than %d",
12764                                                 BTRFS_SUPER_MIRROR_MAX);
12765                                         exit(1);
12766                                 }
12767                                 bytenr = btrfs_sb_offset(((int)num));
12768                                 printf("using SB copy %llu, bytenr %llu\n", num,
12769                                        (unsigned long long)bytenr);
12770                                 break;
12771                         case 'Q':
12772                                 qgroup_report = 1;
12773                                 break;
12774                         case 'E':
12775                                 subvolid = arg_strtou64(optarg);
12776                                 break;
12777                         case 'r':
12778                                 tree_root_bytenr = arg_strtou64(optarg);
12779                                 break;
12780                         case GETOPT_VAL_CHUNK_TREE:
12781                                 chunk_root_bytenr = arg_strtou64(optarg);
12782                                 break;
12783                         case 'p':
12784                                 ctx.progress_enabled = true;
12785                                 break;
12786                         case '?':
12787                         case 'h':
12788                                 usage(cmd_check_usage);
12789                         case GETOPT_VAL_REPAIR:
12790                                 printf("enabling repair mode\n");
12791                                 repair = 1;
12792                                 ctree_flags |= OPEN_CTREE_WRITES;
12793                                 break;
12794                         case GETOPT_VAL_READONLY:
12795                                 readonly = 1;
12796                                 break;
12797                         case GETOPT_VAL_INIT_CSUM:
12798                                 printf("Creating a new CRC tree\n");
12799                                 init_csum_tree = 1;
12800                                 repair = 1;
12801                                 ctree_flags |= OPEN_CTREE_WRITES;
12802                                 break;
12803                         case GETOPT_VAL_INIT_EXTENT:
12804                                 init_extent_tree = 1;
12805                                 ctree_flags |= (OPEN_CTREE_WRITES |
12806                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12807                                 repair = 1;
12808                                 break;
12809                         case GETOPT_VAL_CHECK_CSUM:
12810                                 check_data_csum = 1;
12811                                 break;
12812                         case GETOPT_VAL_MODE:
12813                                 check_mode = parse_check_mode(optarg);
12814                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12815                                         error("unknown mode: %s", optarg);
12816                                         exit(1);
12817                                 }
12818                                 break;
12819                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12820                                 if (strcmp(optarg, "v1") == 0) {
12821                                         clear_space_cache = 1;
12822                                 } else if (strcmp(optarg, "v2") == 0) {
12823                                         clear_space_cache = 2;
12824                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12825                                 } else {
12826                                         error(
12827                 "invalid argument to --clear-space-cache, must be v1 or v2");
12828                                         exit(1);
12829                                 }
12830                                 ctree_flags |= OPEN_CTREE_WRITES;
12831                                 break;
12832                 }
12833         }
12834
12835         if (check_argc_exact(argc - optind, 1))
12836                 usage(cmd_check_usage);
12837
12838         if (ctx.progress_enabled) {
12839                 ctx.tp = TASK_NOTHING;
12840                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12841         }
12842
12843         /* This check is the only reason for --readonly to exist */
12844         if (readonly && repair) {
12845                 error("repair options are not compatible with --readonly");
12846                 exit(1);
12847         }
12848
12849         /*
12850          * Not supported yet
12851          */
12852         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12853                 error("low memory mode doesn't support repair yet");
12854                 exit(1);
12855         }
12856
12857         radix_tree_init();
12858         cache_tree_init(&root_cache);
12859
12860         if((ret = check_mounted(argv[optind])) < 0) {
12861                 error("could not check mount status: %s", strerror(-ret));
12862                 err |= !!ret;
12863                 goto err_out;
12864         } else if(ret) {
12865                 error("%s is currently mounted, aborting", argv[optind]);
12866                 ret = -EBUSY;
12867                 err |= !!ret;
12868                 goto err_out;
12869         }
12870
12871         /* only allow partial opening under repair mode */
12872         if (repair)
12873                 ctree_flags |= OPEN_CTREE_PARTIAL;
12874
12875         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12876                                   chunk_root_bytenr, ctree_flags);
12877         if (!info) {
12878                 error("cannot open file system");
12879                 ret = -EIO;
12880                 err |= !!ret;
12881                 goto err_out;
12882         }
12883
12884         global_info = info;
12885         root = info->fs_root;
12886         if (clear_space_cache == 1) {
12887                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12888                         error(
12889                 "free space cache v2 detected, use --clear-space-cache v2");
12890                         ret = 1;
12891                         goto close_out;
12892                 }
12893                 printf("Clearing free space cache\n");
12894                 ret = clear_free_space_cache(info);
12895                 if (ret) {
12896                         error("failed to clear free space cache");
12897                         ret = 1;
12898                 } else {
12899                         printf("Free space cache cleared\n");
12900                 }
12901                 goto close_out;
12902         } else if (clear_space_cache == 2) {
12903                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12904                         printf("no free space cache v2 to clear\n");
12905                         ret = 0;
12906                         goto close_out;
12907                 }
12908                 printf("Clear free space cache v2\n");
12909                 ret = btrfs_clear_free_space_tree(info);
12910                 if (ret) {
12911                         error("failed to clear free space cache v2: %d", ret);
12912                         ret = 1;
12913                 } else {
12914                         printf("free space cache v2 cleared\n");
12915                 }
12916                 goto close_out;
12917         }
12918
12919         /*
12920          * repair mode will force us to commit transaction which
12921          * will make us fail to load log tree when mounting.
12922          */
12923         if (repair && btrfs_super_log_root(info->super_copy)) {
12924                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12925                 if (!ret) {
12926                         ret = 1;
12927                         err |= !!ret;
12928                         goto close_out;
12929                 }
12930                 ret = zero_log_tree(root);
12931                 err |= !!ret;
12932                 if (ret) {
12933                         error("failed to zero log tree: %d", ret);
12934                         goto close_out;
12935                 }
12936         }
12937
12938         uuid_unparse(info->super_copy->fsid, uuidbuf);
12939         if (qgroup_report) {
12940                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12941                        uuidbuf);
12942                 ret = qgroup_verify_all(info);
12943                 err |= !!ret;
12944                 if (ret == 0)
12945                         report_qgroups(1);
12946                 goto close_out;
12947         }
12948         if (subvolid) {
12949                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12950                        subvolid, argv[optind], uuidbuf);
12951                 ret = print_extent_state(info, subvolid);
12952                 err |= !!ret;
12953                 goto close_out;
12954         }
12955         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12956
12957         if (!extent_buffer_uptodate(info->tree_root->node) ||
12958             !extent_buffer_uptodate(info->dev_root->node) ||
12959             !extent_buffer_uptodate(info->chunk_root->node)) {
12960                 error("critical roots corrupted, unable to check the filesystem");
12961                 err |= !!ret;
12962                 ret = -EIO;
12963                 goto close_out;
12964         }
12965
12966         if (init_extent_tree || init_csum_tree) {
12967                 struct btrfs_trans_handle *trans;
12968
12969                 trans = btrfs_start_transaction(info->extent_root, 0);
12970                 if (IS_ERR(trans)) {
12971                         error("error starting transaction");
12972                         ret = PTR_ERR(trans);
12973                         err |= !!ret;
12974                         goto close_out;
12975                 }
12976
12977                 if (init_extent_tree) {
12978                         printf("Creating a new extent tree\n");
12979                         ret = reinit_extent_tree(trans, info);
12980                         err |= !!ret;
12981                         if (ret)
12982                                 goto close_out;
12983                 }
12984
12985                 if (init_csum_tree) {
12986                         printf("Reinitialize checksum tree\n");
12987                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12988                         if (ret) {
12989                                 error("checksum tree initialization failed: %d",
12990                                                 ret);
12991                                 ret = -EIO;
12992                                 err |= !!ret;
12993                                 goto close_out;
12994                         }
12995
12996                         ret = fill_csum_tree(trans, info->csum_root,
12997                                              init_extent_tree);
12998                         err |= !!ret;
12999                         if (ret) {
13000                                 error("checksum tree refilling failed: %d", ret);
13001                                 return -EIO;
13002                         }
13003                 }
13004                 /*
13005                  * Ok now we commit and run the normal fsck, which will add
13006                  * extent entries for all of the items it finds.
13007                  */
13008                 ret = btrfs_commit_transaction(trans, info->extent_root);
13009                 err |= !!ret;
13010                 if (ret)
13011                         goto close_out;
13012         }
13013         if (!extent_buffer_uptodate(info->extent_root->node)) {
13014                 error("critical: extent_root, unable to check the filesystem");
13015                 ret = -EIO;
13016                 err |= !!ret;
13017                 goto close_out;
13018         }
13019         if (!extent_buffer_uptodate(info->csum_root->node)) {
13020                 error("critical: csum_root, unable to check the filesystem");
13021                 ret = -EIO;
13022                 err |= !!ret;
13023                 goto close_out;
13024         }
13025
13026         if (!ctx.progress_enabled)
13027                 fprintf(stderr, "checking extents\n");
13028         if (check_mode == CHECK_MODE_LOWMEM)
13029                 ret = check_chunks_and_extents_v2(root);
13030         else
13031                 ret = check_chunks_and_extents(root);
13032         err |= !!ret;
13033         if (ret)
13034                 error(
13035                 "errors found in extent allocation tree or chunk allocation");
13036
13037         ret = repair_root_items(info);
13038         err |= !!ret;
13039         if (ret < 0) {
13040                 error("failed to repair root items: %s", strerror(-ret));
13041                 goto close_out;
13042         }
13043         if (repair) {
13044                 fprintf(stderr, "Fixed %d roots.\n", ret);
13045                 ret = 0;
13046         } else if (ret > 0) {
13047                 fprintf(stderr,
13048                        "Found %d roots with an outdated root item.\n",
13049                        ret);
13050                 fprintf(stderr,
13051                         "Please run a filesystem check with the option --repair to fix them.\n");
13052                 ret = 1;
13053                 err |= !!ret;
13054                 goto close_out;
13055         }
13056
13057         if (!ctx.progress_enabled) {
13058                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13059                         fprintf(stderr, "checking free space tree\n");
13060                 else
13061                         fprintf(stderr, "checking free space cache\n");
13062         }
13063         ret = check_space_cache(root);
13064         err |= !!ret;
13065         if (ret) {
13066                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13067                         error("errors found in free space tree");
13068                 else
13069                         error("errors found in free space cache");
13070                 goto out;
13071         }
13072
13073         /*
13074          * We used to have to have these hole extents in between our real
13075          * extents so if we don't have this flag set we need to make sure there
13076          * are no gaps in the file extents for inodes, otherwise we can just
13077          * ignore it when this happens.
13078          */
13079         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13080         if (!ctx.progress_enabled)
13081                 fprintf(stderr, "checking fs roots\n");
13082         if (check_mode == CHECK_MODE_LOWMEM)
13083                 ret = check_fs_roots_v2(root->fs_info);
13084         else
13085                 ret = check_fs_roots(root, &root_cache);
13086         err |= !!ret;
13087         if (ret) {
13088                 error("errors found in fs roots");
13089                 goto out;
13090         }
13091
13092         fprintf(stderr, "checking csums\n");
13093         ret = check_csums(root);
13094         err |= !!ret;
13095         if (ret) {
13096                 error("errors found in csum tree");
13097                 goto out;
13098         }
13099
13100         fprintf(stderr, "checking root refs\n");
13101         /* For low memory mode, check_fs_roots_v2 handles root refs */
13102         if (check_mode != CHECK_MODE_LOWMEM) {
13103                 ret = check_root_refs(root, &root_cache);
13104                 err |= !!ret;
13105                 if (ret) {
13106                         error("errors found in root refs");
13107                         goto out;
13108                 }
13109         }
13110
13111         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13112                 struct extent_buffer *eb;
13113
13114                 eb = list_first_entry(&root->fs_info->recow_ebs,
13115                                       struct extent_buffer, recow);
13116                 list_del_init(&eb->recow);
13117                 ret = recow_extent_buffer(root, eb);
13118                 err |= !!ret;
13119                 if (ret) {
13120                         error("fails to fix transid errors");
13121                         break;
13122                 }
13123         }
13124
13125         while (!list_empty(&delete_items)) {
13126                 struct bad_item *bad;
13127
13128                 bad = list_first_entry(&delete_items, struct bad_item, list);
13129                 list_del_init(&bad->list);
13130                 if (repair) {
13131                         ret = delete_bad_item(root, bad);
13132                         err |= !!ret;
13133                 }
13134                 free(bad);
13135         }
13136
13137         if (info->quota_enabled) {
13138                 fprintf(stderr, "checking quota groups\n");
13139                 ret = qgroup_verify_all(info);
13140                 err |= !!ret;
13141                 if (ret) {
13142                         error("failed to check quota groups");
13143                         goto out;
13144                 }
13145                 report_qgroups(0);
13146                 ret = repair_qgroups(info, &qgroups_repaired);
13147                 err |= !!ret;
13148                 if (err) {
13149                         error("failed to repair quota groups");
13150                         goto out;
13151                 }
13152                 ret = 0;
13153         }
13154
13155         if (!list_empty(&root->fs_info->recow_ebs)) {
13156                 error("transid errors in file system");
13157                 ret = 1;
13158                 err |= !!ret;
13159         }
13160 out:
13161         if (found_old_backref) { /*
13162                  * there was a disk format change when mixed
13163                  * backref was in testing tree. The old format
13164                  * existed about one week.
13165                  */
13166                 printf("\n * Found old mixed backref format. "
13167                        "The old format is not supported! *"
13168                        "\n * Please mount the FS in readonly mode, "
13169                        "backup data and re-format the FS. *\n\n");
13170                 err |= 1;
13171         }
13172         printf("found %llu bytes used, ",
13173                (unsigned long long)bytes_used);
13174         if (err)
13175                 printf("error(s) found\n");
13176         else
13177                 printf("no error found\n");
13178         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13179         printf("total tree bytes: %llu\n",
13180                (unsigned long long)total_btree_bytes);
13181         printf("total fs tree bytes: %llu\n",
13182                (unsigned long long)total_fs_tree_bytes);
13183         printf("total extent tree bytes: %llu\n",
13184                (unsigned long long)total_extent_tree_bytes);
13185         printf("btree space waste bytes: %llu\n",
13186                (unsigned long long)btree_space_waste);
13187         printf("file data blocks allocated: %llu\n referenced %llu\n",
13188                 (unsigned long long)data_bytes_allocated,
13189                 (unsigned long long)data_bytes_referenced);
13190
13191         free_qgroup_counts();
13192         free_root_recs_tree(&root_cache);
13193 close_out:
13194         close_ctree(root);
13195 err_out:
13196         if (ctx.progress_enabled)
13197                 task_deinit(ctx.info);
13198
13199         return err;
13200 }