btrfs-progs: drop blocksize argument from readahead_tree_block
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 struct extent_backref {
89         struct list_head list;
90         unsigned int is_data:1;
91         unsigned int found_extent_tree:1;
92         unsigned int full_backref:1;
93         unsigned int found_ref:1;
94         unsigned int broken:1;
95 };
96
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 {
99         return list_entry(entry, struct extent_backref, list);
100 }
101
102 struct data_backref {
103         struct extent_backref node;
104         union {
105                 u64 parent;
106                 u64 root;
107         };
108         u64 owner;
109         u64 offset;
110         u64 disk_bytenr;
111         u64 bytes;
112         u64 ram_bytes;
113         u32 num_refs;
114         u32 found_ref;
115 };
116
117 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
131 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
134
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct data_backref, node);
138 }
139
140 /*
141  * Much like data_backref, just removed the undetermined members
142  * and change it to use list_head.
143  * During extent scan, it is stored in root->orphan_data_extent.
144  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145  */
146 struct orphan_data_extent {
147         struct list_head list;
148         u64 root;
149         u64 objectid;
150         u64 offset;
151         u64 disk_bytenr;
152         u64 disk_len;
153 };
154
155 struct tree_backref {
156         struct extent_backref node;
157         union {
158                 u64 parent;
159                 u64 root;
160         };
161 };
162
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 {
165         return container_of(back, struct tree_backref, node);
166 }
167
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
170
171 struct extent_record {
172         struct list_head backrefs;
173         struct list_head dups;
174         struct list_head list;
175         struct cache_extent cache;
176         struct btrfs_disk_key parent_key;
177         u64 start;
178         u64 max_size;
179         u64 nr;
180         u64 refs;
181         u64 extent_item_refs;
182         u64 generation;
183         u64 parent_generation;
184         u64 info_objectid;
185         u32 num_duplicates;
186         u8 info_level;
187         unsigned int flag_block_full_backref:2;
188         unsigned int found_rec:1;
189         unsigned int content_checked:1;
190         unsigned int owner_ref_checked:1;
191         unsigned int is_root:1;
192         unsigned int metadata:1;
193         unsigned int bad_full_backref:1;
194         unsigned int crossing_stripes:1;
195         unsigned int wrong_chunk_type:1;
196 };
197
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 {
200         return container_of(entry, struct extent_record, list);
201 }
202
203 struct inode_backref {
204         struct list_head list;
205         unsigned int found_dir_item:1;
206         unsigned int found_dir_index:1;
207         unsigned int found_inode_ref:1;
208         u8 filetype;
209         u8 ref_type;
210         int errors;
211         u64 dir;
212         u64 index;
213         u16 namelen;
214         char name[0];
215 };
216
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 {
219         return list_entry(entry, struct inode_backref, list);
220 }
221
222 struct root_item_record {
223         struct list_head list;
224         u64 objectid;
225         u64 bytenr;
226         u64 last_snapshot;
227         u8 level;
228         u8 drop_level;
229         struct btrfs_key drop_key;
230 };
231
232 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
233 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
234 #define REF_ERR_NO_INODE_REF            (1 << 2)
235 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
236 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
237 #define REF_ERR_DUP_INODE_REF           (1 << 5)
238 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
239 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
240 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
241 #define REF_ERR_NO_ROOT_REF             (1 << 9)
242 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
243 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
244 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
245
246 struct file_extent_hole {
247         struct rb_node node;
248         u64 start;
249         u64 len;
250 };
251
252 struct inode_record {
253         struct list_head backrefs;
254         unsigned int checked:1;
255         unsigned int merging:1;
256         unsigned int found_inode_item:1;
257         unsigned int found_dir_item:1;
258         unsigned int found_file_extent:1;
259         unsigned int found_csum_item:1;
260         unsigned int some_csum_missing:1;
261         unsigned int nodatasum:1;
262         int errors;
263
264         u64 ino;
265         u32 nlink;
266         u32 imode;
267         u64 isize;
268         u64 nbytes;
269
270         u32 found_link;
271         u64 found_size;
272         u64 extent_start;
273         u64 extent_end;
274         struct rb_root holes;
275         struct list_head orphan_extents;
276
277         u32 refs;
278 };
279
280 #define I_ERR_NO_INODE_ITEM             (1 << 0)
281 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
282 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
283 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
284 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
285 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
286 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
287 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
288 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
289 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
290 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
291 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
292 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
293 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
294 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
295
296 struct root_backref {
297         struct list_head list;
298         unsigned int found_dir_item:1;
299         unsigned int found_dir_index:1;
300         unsigned int found_back_ref:1;
301         unsigned int found_forward_ref:1;
302         unsigned int reachable:1;
303         int errors;
304         u64 ref_root;
305         u64 dir;
306         u64 index;
307         u16 namelen;
308         char name[0];
309 };
310
311 static inline struct root_backref* to_root_backref(struct list_head *entry)
312 {
313         return list_entry(entry, struct root_backref, list);
314 }
315
316 struct root_record {
317         struct list_head backrefs;
318         struct cache_extent cache;
319         unsigned int found_root_item:1;
320         u64 objectid;
321         u32 found_ref;
322 };
323
324 struct ptr_node {
325         struct cache_extent cache;
326         void *data;
327 };
328
329 struct shared_node {
330         struct cache_extent cache;
331         struct cache_tree root_cache;
332         struct cache_tree inode_cache;
333         struct inode_record *current;
334         u32 refs;
335 };
336
337 struct block_info {
338         u64 start;
339         u32 size;
340 };
341
342 struct walk_control {
343         struct cache_tree shared;
344         struct shared_node *nodes[BTRFS_MAX_LEVEL];
345         int active_node;
346         int root_level;
347 };
348
349 struct bad_item {
350         struct btrfs_key key;
351         u64 root_id;
352         struct list_head list;
353 };
354
355 struct extent_entry {
356         u64 bytenr;
357         u64 bytes;
358         int count;
359         int broken;
360         struct list_head list;
361 };
362
363 struct root_item_info {
364         /* level of the root */
365         u8 level;
366         /* number of nodes at this level, must be 1 for a root */
367         int node_count;
368         u64 bytenr;
369         u64 gen;
370         struct cache_extent cache_extent;
371 };
372
373 /*
374  * Error bit for low memory mode check.
375  *
376  * Currently no caller cares about it yet.  Just internal use for error
377  * classification.
378  */
379 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
380 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
381 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
382 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
383 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
384 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
385 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
386 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
387 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
388 #define CHUNK_TYPE_MISMATCH     (1 << 8)
389
390 static void *print_status_check(void *p)
391 {
392         struct task_ctx *priv = p;
393         const char work_indicator[] = { '.', 'o', 'O', 'o' };
394         uint32_t count = 0;
395         static char *task_position_string[] = {
396                 "checking extents",
397                 "checking free space cache",
398                 "checking fs roots",
399         };
400
401         task_period_start(priv->info, 1000 /* 1s */);
402
403         if (priv->tp == TASK_NOTHING)
404                 return NULL;
405
406         while (1) {
407                 printf("%s [%c]\r", task_position_string[priv->tp],
408                                 work_indicator[count % 4]);
409                 count++;
410                 fflush(stdout);
411                 task_period_wait(priv->info);
412         }
413         return NULL;
414 }
415
416 static int print_status_return(void *p)
417 {
418         printf("\n");
419         fflush(stdout);
420
421         return 0;
422 }
423
424 static enum btrfs_check_mode parse_check_mode(const char *str)
425 {
426         if (strcmp(str, "lowmem") == 0)
427                 return CHECK_MODE_LOWMEM;
428         if (strcmp(str, "orig") == 0)
429                 return CHECK_MODE_ORIGINAL;
430         if (strcmp(str, "original") == 0)
431                 return CHECK_MODE_ORIGINAL;
432
433         return CHECK_MODE_UNKNOWN;
434 }
435
436 /* Compatible function to allow reuse of old codes */
437 static u64 first_extent_gap(struct rb_root *holes)
438 {
439         struct file_extent_hole *hole;
440
441         if (RB_EMPTY_ROOT(holes))
442                 return (u64)-1;
443
444         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
445         return hole->start;
446 }
447
448 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
449 {
450         struct file_extent_hole *hole1;
451         struct file_extent_hole *hole2;
452
453         hole1 = rb_entry(node1, struct file_extent_hole, node);
454         hole2 = rb_entry(node2, struct file_extent_hole, node);
455
456         if (hole1->start > hole2->start)
457                 return -1;
458         if (hole1->start < hole2->start)
459                 return 1;
460         /* Now hole1->start == hole2->start */
461         if (hole1->len >= hole2->len)
462                 /*
463                  * Hole 1 will be merge center
464                  * Same hole will be merged later
465                  */
466                 return -1;
467         /* Hole 2 will be merge center */
468         return 1;
469 }
470
471 /*
472  * Add a hole to the record
473  *
474  * This will do hole merge for copy_file_extent_holes(),
475  * which will ensure there won't be continuous holes.
476  */
477 static int add_file_extent_hole(struct rb_root *holes,
478                                 u64 start, u64 len)
479 {
480         struct file_extent_hole *hole;
481         struct file_extent_hole *prev = NULL;
482         struct file_extent_hole *next = NULL;
483
484         hole = malloc(sizeof(*hole));
485         if (!hole)
486                 return -ENOMEM;
487         hole->start = start;
488         hole->len = len;
489         /* Since compare will not return 0, no -EEXIST will happen */
490         rb_insert(holes, &hole->node, compare_hole);
491
492         /* simple merge with previous hole */
493         if (rb_prev(&hole->node))
494                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
495                                 node);
496         if (prev && prev->start + prev->len >= hole->start) {
497                 hole->len = hole->start + hole->len - prev->start;
498                 hole->start = prev->start;
499                 rb_erase(&prev->node, holes);
500                 free(prev);
501                 prev = NULL;
502         }
503
504         /* iterate merge with next holes */
505         while (1) {
506                 if (!rb_next(&hole->node))
507                         break;
508                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
509                                         node);
510                 if (hole->start + hole->len >= next->start) {
511                         if (hole->start + hole->len <= next->start + next->len)
512                                 hole->len = next->start + next->len -
513                                             hole->start;
514                         rb_erase(&next->node, holes);
515                         free(next);
516                         next = NULL;
517                 } else
518                         break;
519         }
520         return 0;
521 }
522
523 static int compare_hole_range(struct rb_node *node, void *data)
524 {
525         struct file_extent_hole *hole;
526         u64 start;
527
528         hole = (struct file_extent_hole *)data;
529         start = hole->start;
530
531         hole = rb_entry(node, struct file_extent_hole, node);
532         if (start < hole->start)
533                 return -1;
534         if (start >= hole->start && start < hole->start + hole->len)
535                 return 0;
536         return 1;
537 }
538
539 /*
540  * Delete a hole in the record
541  *
542  * This will do the hole split and is much restrict than add.
543  */
544 static int del_file_extent_hole(struct rb_root *holes,
545                                 u64 start, u64 len)
546 {
547         struct file_extent_hole *hole;
548         struct file_extent_hole tmp;
549         u64 prev_start = 0;
550         u64 prev_len = 0;
551         u64 next_start = 0;
552         u64 next_len = 0;
553         struct rb_node *node;
554         int have_prev = 0;
555         int have_next = 0;
556         int ret = 0;
557
558         tmp.start = start;
559         tmp.len = len;
560         node = rb_search(holes, &tmp, compare_hole_range, NULL);
561         if (!node)
562                 return -EEXIST;
563         hole = rb_entry(node, struct file_extent_hole, node);
564         if (start + len > hole->start + hole->len)
565                 return -EEXIST;
566
567         /*
568          * Now there will be no overlap, delete the hole and re-add the
569          * split(s) if they exists.
570          */
571         if (start > hole->start) {
572                 prev_start = hole->start;
573                 prev_len = start - hole->start;
574                 have_prev = 1;
575         }
576         if (hole->start + hole->len > start + len) {
577                 next_start = start + len;
578                 next_len = hole->start + hole->len - start - len;
579                 have_next = 1;
580         }
581         rb_erase(node, holes);
582         free(hole);
583         if (have_prev) {
584                 ret = add_file_extent_hole(holes, prev_start, prev_len);
585                 if (ret < 0)
586                         return ret;
587         }
588         if (have_next) {
589                 ret = add_file_extent_hole(holes, next_start, next_len);
590                 if (ret < 0)
591                         return ret;
592         }
593         return 0;
594 }
595
596 static int copy_file_extent_holes(struct rb_root *dst,
597                                   struct rb_root *src)
598 {
599         struct file_extent_hole *hole;
600         struct rb_node *node;
601         int ret = 0;
602
603         node = rb_first(src);
604         while (node) {
605                 hole = rb_entry(node, struct file_extent_hole, node);
606                 ret = add_file_extent_hole(dst, hole->start, hole->len);
607                 if (ret)
608                         break;
609                 node = rb_next(node);
610         }
611         return ret;
612 }
613
614 static void free_file_extent_holes(struct rb_root *holes)
615 {
616         struct rb_node *node;
617         struct file_extent_hole *hole;
618
619         node = rb_first(holes);
620         while (node) {
621                 hole = rb_entry(node, struct file_extent_hole, node);
622                 rb_erase(node, holes);
623                 free(hole);
624                 node = rb_first(holes);
625         }
626 }
627
628 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
629
630 static void record_root_in_trans(struct btrfs_trans_handle *trans,
631                                  struct btrfs_root *root)
632 {
633         if (root->last_trans != trans->transid) {
634                 root->track_dirty = 1;
635                 root->last_trans = trans->transid;
636                 root->commit_root = root->node;
637                 extent_buffer_get(root->node);
638         }
639 }
640
641 static u8 imode_to_type(u32 imode)
642 {
643 #define S_SHIFT 12
644         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
645                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
646                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
647                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
648                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
649                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
650                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
651                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
652         };
653
654         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
655 #undef S_SHIFT
656 }
657
658 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
659 {
660         struct device_record *rec1;
661         struct device_record *rec2;
662
663         rec1 = rb_entry(node1, struct device_record, node);
664         rec2 = rb_entry(node2, struct device_record, node);
665         if (rec1->devid > rec2->devid)
666                 return -1;
667         else if (rec1->devid < rec2->devid)
668                 return 1;
669         else
670                 return 0;
671 }
672
673 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
674 {
675         struct inode_record *rec;
676         struct inode_backref *backref;
677         struct inode_backref *orig;
678         struct inode_backref *tmp;
679         struct orphan_data_extent *src_orphan;
680         struct orphan_data_extent *dst_orphan;
681         struct rb_node *rb;
682         size_t size;
683         int ret;
684
685         rec = malloc(sizeof(*rec));
686         if (!rec)
687                 return ERR_PTR(-ENOMEM);
688         memcpy(rec, orig_rec, sizeof(*rec));
689         rec->refs = 1;
690         INIT_LIST_HEAD(&rec->backrefs);
691         INIT_LIST_HEAD(&rec->orphan_extents);
692         rec->holes = RB_ROOT;
693
694         list_for_each_entry(orig, &orig_rec->backrefs, list) {
695                 size = sizeof(*orig) + orig->namelen + 1;
696                 backref = malloc(size);
697                 if (!backref) {
698                         ret = -ENOMEM;
699                         goto cleanup;
700                 }
701                 memcpy(backref, orig, size);
702                 list_add_tail(&backref->list, &rec->backrefs);
703         }
704         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
705                 dst_orphan = malloc(sizeof(*dst_orphan));
706                 if (!dst_orphan) {
707                         ret = -ENOMEM;
708                         goto cleanup;
709                 }
710                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
711                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
712         }
713         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
714         if (ret < 0)
715                 goto cleanup_rb;
716
717         return rec;
718
719 cleanup_rb:
720         rb = rb_first(&rec->holes);
721         while (rb) {
722                 struct file_extent_hole *hole;
723
724                 hole = rb_entry(rb, struct file_extent_hole, node);
725                 rb = rb_next(rb);
726                 free(hole);
727         }
728
729 cleanup:
730         if (!list_empty(&rec->backrefs))
731                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
732                         list_del(&orig->list);
733                         free(orig);
734                 }
735
736         if (!list_empty(&rec->orphan_extents))
737                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
738                         list_del(&orig->list);
739                         free(orig);
740                 }
741
742         free(rec);
743
744         return ERR_PTR(ret);
745 }
746
747 static void print_orphan_data_extents(struct list_head *orphan_extents,
748                                       u64 objectid)
749 {
750         struct orphan_data_extent *orphan;
751
752         if (list_empty(orphan_extents))
753                 return;
754         printf("The following data extent is lost in tree %llu:\n",
755                objectid);
756         list_for_each_entry(orphan, orphan_extents, list) {
757                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
758                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
759                        orphan->disk_len);
760         }
761 }
762
763 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
764 {
765         u64 root_objectid = root->root_key.objectid;
766         int errors = rec->errors;
767
768         if (!errors)
769                 return;
770         /* reloc root errors, we print its corresponding fs root objectid*/
771         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
772                 root_objectid = root->root_key.offset;
773                 fprintf(stderr, "reloc");
774         }
775         fprintf(stderr, "root %llu inode %llu errors %x",
776                 (unsigned long long) root_objectid,
777                 (unsigned long long) rec->ino, rec->errors);
778
779         if (errors & I_ERR_NO_INODE_ITEM)
780                 fprintf(stderr, ", no inode item");
781         if (errors & I_ERR_NO_ORPHAN_ITEM)
782                 fprintf(stderr, ", no orphan item");
783         if (errors & I_ERR_DUP_INODE_ITEM)
784                 fprintf(stderr, ", dup inode item");
785         if (errors & I_ERR_DUP_DIR_INDEX)
786                 fprintf(stderr, ", dup dir index");
787         if (errors & I_ERR_ODD_DIR_ITEM)
788                 fprintf(stderr, ", odd dir item");
789         if (errors & I_ERR_ODD_FILE_EXTENT)
790                 fprintf(stderr, ", odd file extent");
791         if (errors & I_ERR_BAD_FILE_EXTENT)
792                 fprintf(stderr, ", bad file extent");
793         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
794                 fprintf(stderr, ", file extent overlap");
795         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
796                 fprintf(stderr, ", file extent discount");
797         if (errors & I_ERR_DIR_ISIZE_WRONG)
798                 fprintf(stderr, ", dir isize wrong");
799         if (errors & I_ERR_FILE_NBYTES_WRONG)
800                 fprintf(stderr, ", nbytes wrong");
801         if (errors & I_ERR_ODD_CSUM_ITEM)
802                 fprintf(stderr, ", odd csum item");
803         if (errors & I_ERR_SOME_CSUM_MISSING)
804                 fprintf(stderr, ", some csum missing");
805         if (errors & I_ERR_LINK_COUNT_WRONG)
806                 fprintf(stderr, ", link count wrong");
807         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
808                 fprintf(stderr, ", orphan file extent");
809         fprintf(stderr, "\n");
810         /* Print the orphan extents if needed */
811         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
812                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
813
814         /* Print the holes if needed */
815         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
816                 struct file_extent_hole *hole;
817                 struct rb_node *node;
818                 int found = 0;
819
820                 node = rb_first(&rec->holes);
821                 fprintf(stderr, "Found file extent holes:\n");
822                 while (node) {
823                         found = 1;
824                         hole = rb_entry(node, struct file_extent_hole, node);
825                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
826                                 hole->start, hole->len);
827                         node = rb_next(node);
828                 }
829                 if (!found)
830                         fprintf(stderr, "\tstart: 0, len: %llu\n",
831                                 round_up(rec->isize,
832                                          root->fs_info->sectorsize));
833         }
834 }
835
836 static void print_ref_error(int errors)
837 {
838         if (errors & REF_ERR_NO_DIR_ITEM)
839                 fprintf(stderr, ", no dir item");
840         if (errors & REF_ERR_NO_DIR_INDEX)
841                 fprintf(stderr, ", no dir index");
842         if (errors & REF_ERR_NO_INODE_REF)
843                 fprintf(stderr, ", no inode ref");
844         if (errors & REF_ERR_DUP_DIR_ITEM)
845                 fprintf(stderr, ", dup dir item");
846         if (errors & REF_ERR_DUP_DIR_INDEX)
847                 fprintf(stderr, ", dup dir index");
848         if (errors & REF_ERR_DUP_INODE_REF)
849                 fprintf(stderr, ", dup inode ref");
850         if (errors & REF_ERR_INDEX_UNMATCH)
851                 fprintf(stderr, ", index mismatch");
852         if (errors & REF_ERR_FILETYPE_UNMATCH)
853                 fprintf(stderr, ", filetype mismatch");
854         if (errors & REF_ERR_NAME_TOO_LONG)
855                 fprintf(stderr, ", name too long");
856         if (errors & REF_ERR_NO_ROOT_REF)
857                 fprintf(stderr, ", no root ref");
858         if (errors & REF_ERR_NO_ROOT_BACKREF)
859                 fprintf(stderr, ", no root backref");
860         if (errors & REF_ERR_DUP_ROOT_REF)
861                 fprintf(stderr, ", dup root ref");
862         if (errors & REF_ERR_DUP_ROOT_BACKREF)
863                 fprintf(stderr, ", dup root backref");
864         fprintf(stderr, "\n");
865 }
866
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
868                                           u64 ino, int mod)
869 {
870         struct ptr_node *node;
871         struct cache_extent *cache;
872         struct inode_record *rec = NULL;
873         int ret;
874
875         cache = lookup_cache_extent(inode_cache, ino, 1);
876         if (cache) {
877                 node = container_of(cache, struct ptr_node, cache);
878                 rec = node->data;
879                 if (mod && rec->refs > 1) {
880                         node->data = clone_inode_rec(rec);
881                         if (IS_ERR(node->data))
882                                 return node->data;
883                         rec->refs--;
884                         rec = node->data;
885                 }
886         } else if (mod) {
887                 rec = calloc(1, sizeof(*rec));
888                 if (!rec)
889                         return ERR_PTR(-ENOMEM);
890                 rec->ino = ino;
891                 rec->extent_start = (u64)-1;
892                 rec->refs = 1;
893                 INIT_LIST_HEAD(&rec->backrefs);
894                 INIT_LIST_HEAD(&rec->orphan_extents);
895                 rec->holes = RB_ROOT;
896
897                 node = malloc(sizeof(*node));
898                 if (!node) {
899                         free(rec);
900                         return ERR_PTR(-ENOMEM);
901                 }
902                 node->cache.start = ino;
903                 node->cache.size = 1;
904                 node->data = rec;
905
906                 if (ino == BTRFS_FREE_INO_OBJECTID)
907                         rec->found_link = 1;
908
909                 ret = insert_cache_extent(inode_cache, &node->cache);
910                 if (ret)
911                         return ERR_PTR(-EEXIST);
912         }
913         return rec;
914 }
915
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 {
918         struct orphan_data_extent *orphan;
919
920         while (!list_empty(orphan_extents)) {
921                 orphan = list_entry(orphan_extents->next,
922                                     struct orphan_data_extent, list);
923                 list_del(&orphan->list);
924                 free(orphan);
925         }
926 }
927
928 static void free_inode_rec(struct inode_record *rec)
929 {
930         struct inode_backref *backref;
931
932         if (--rec->refs > 0)
933                 return;
934
935         while (!list_empty(&rec->backrefs)) {
936                 backref = to_inode_backref(rec->backrefs.next);
937                 list_del(&backref->list);
938                 free(backref);
939         }
940         free_orphan_data_extents(&rec->orphan_extents);
941         free_file_extent_holes(&rec->holes);
942         free(rec);
943 }
944
945 static int can_free_inode_rec(struct inode_record *rec)
946 {
947         if (!rec->errors && rec->checked && rec->found_inode_item &&
948             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
949                 return 1;
950         return 0;
951 }
952
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954                                  struct inode_record *rec)
955 {
956         struct cache_extent *cache;
957         struct inode_backref *tmp, *backref;
958         struct ptr_node *node;
959         u8 filetype;
960
961         if (!rec->found_inode_item)
962                 return;
963
964         filetype = imode_to_type(rec->imode);
965         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966                 if (backref->found_dir_item && backref->found_dir_index) {
967                         if (backref->filetype != filetype)
968                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969                         if (!backref->errors && backref->found_inode_ref &&
970                             rec->nlink == rec->found_link) {
971                                 list_del(&backref->list);
972                                 free(backref);
973                         }
974                 }
975         }
976
977         if (!rec->checked || rec->merging)
978                 return;
979
980         if (S_ISDIR(rec->imode)) {
981                 if (rec->found_size != rec->isize)
982                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983                 if (rec->found_file_extent)
984                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
985         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986                 if (rec->found_dir_item)
987                         rec->errors |= I_ERR_ODD_DIR_ITEM;
988                 if (rec->found_size != rec->nbytes)
989                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990                 if (rec->nlink > 0 && !no_holes &&
991                     (rec->extent_end < rec->isize ||
992                      first_extent_gap(&rec->holes) < rec->isize))
993                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
994         }
995
996         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997                 if (rec->found_csum_item && rec->nodatasum)
998                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
999                 if (rec->some_csum_missing && !rec->nodatasum)
1000                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1001         }
1002
1003         BUG_ON(rec->refs != 1);
1004         if (can_free_inode_rec(rec)) {
1005                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006                 node = container_of(cache, struct ptr_node, cache);
1007                 BUG_ON(node->data != rec);
1008                 remove_cache_extent(inode_cache, &node->cache);
1009                 free(node);
1010                 free_inode_rec(rec);
1011         }
1012 }
1013
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 {
1016         struct btrfs_path path;
1017         struct btrfs_key key;
1018         int ret;
1019
1020         key.objectid = BTRFS_ORPHAN_OBJECTID;
1021         key.type = BTRFS_ORPHAN_ITEM_KEY;
1022         key.offset = ino;
1023
1024         btrfs_init_path(&path);
1025         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026         btrfs_release_path(&path);
1027         if (ret > 0)
1028                 ret = -ENOENT;
1029         return ret;
1030 }
1031
1032 static int process_inode_item(struct extent_buffer *eb,
1033                               int slot, struct btrfs_key *key,
1034                               struct shared_node *active_node)
1035 {
1036         struct inode_record *rec;
1037         struct btrfs_inode_item *item;
1038
1039         rec = active_node->current;
1040         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041         if (rec->found_inode_item) {
1042                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1043                 return 1;
1044         }
1045         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046         rec->nlink = btrfs_inode_nlink(eb, item);
1047         rec->isize = btrfs_inode_size(eb, item);
1048         rec->nbytes = btrfs_inode_nbytes(eb, item);
1049         rec->imode = btrfs_inode_mode(eb, item);
1050         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051                 rec->nodatasum = 1;
1052         rec->found_inode_item = 1;
1053         if (rec->nlink == 0)
1054                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055         maybe_free_inode_rec(&active_node->inode_cache, rec);
1056         return 0;
1057 }
1058
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060                                                 const char *name,
1061                                                 int namelen, u64 dir)
1062 {
1063         struct inode_backref *backref;
1064
1065         list_for_each_entry(backref, &rec->backrefs, list) {
1066                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067                         break;
1068                 if (backref->dir != dir || backref->namelen != namelen)
1069                         continue;
1070                 if (memcmp(name, backref->name, namelen))
1071                         continue;
1072                 return backref;
1073         }
1074
1075         backref = malloc(sizeof(*backref) + namelen + 1);
1076         if (!backref)
1077                 return NULL;
1078         memset(backref, 0, sizeof(*backref));
1079         backref->dir = dir;
1080         backref->namelen = namelen;
1081         memcpy(backref->name, name, namelen);
1082         backref->name[namelen] = '\0';
1083         list_add_tail(&backref->list, &rec->backrefs);
1084         return backref;
1085 }
1086
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088                              u64 ino, u64 dir, u64 index,
1089                              const char *name, int namelen,
1090                              u8 filetype, u8 itemtype, int errors)
1091 {
1092         struct inode_record *rec;
1093         struct inode_backref *backref;
1094
1095         rec = get_inode_rec(inode_cache, ino, 1);
1096         BUG_ON(IS_ERR(rec));
1097         backref = get_inode_backref(rec, name, namelen, dir);
1098         BUG_ON(!backref);
1099         if (errors)
1100                 backref->errors |= errors;
1101         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102                 if (backref->found_dir_index)
1103                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104                 if (backref->found_inode_ref && backref->index != index)
1105                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1106                 if (backref->found_dir_item && backref->filetype != filetype)
1107                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108
1109                 backref->index = index;
1110                 backref->filetype = filetype;
1111                 backref->found_dir_index = 1;
1112         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113                 rec->found_link++;
1114                 if (backref->found_dir_item)
1115                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116                 if (backref->found_dir_index && backref->filetype != filetype)
1117                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118
1119                 backref->filetype = filetype;
1120                 backref->found_dir_item = 1;
1121         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123                 if (backref->found_inode_ref)
1124                         backref->errors |= REF_ERR_DUP_INODE_REF;
1125                 if (backref->found_dir_index && backref->index != index)
1126                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1127                 else
1128                         backref->index = index;
1129
1130                 backref->ref_type = itemtype;
1131                 backref->found_inode_ref = 1;
1132         } else {
1133                 BUG_ON(1);
1134         }
1135
1136         maybe_free_inode_rec(inode_cache, rec);
1137         return 0;
1138 }
1139
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141                             struct cache_tree *dst_cache)
1142 {
1143         struct inode_backref *backref;
1144         u32 dir_count = 0;
1145         int ret = 0;
1146
1147         dst->merging = 1;
1148         list_for_each_entry(backref, &src->backrefs, list) {
1149                 if (backref->found_dir_index) {
1150                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1151                                         backref->index, backref->name,
1152                                         backref->namelen, backref->filetype,
1153                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1154                 }
1155                 if (backref->found_dir_item) {
1156                         dir_count++;
1157                         add_inode_backref(dst_cache, dst->ino,
1158                                         backref->dir, 0, backref->name,
1159                                         backref->namelen, backref->filetype,
1160                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1161                 }
1162                 if (backref->found_inode_ref) {
1163                         add_inode_backref(dst_cache, dst->ino,
1164                                         backref->dir, backref->index,
1165                                         backref->name, backref->namelen, 0,
1166                                         backref->ref_type, backref->errors);
1167                 }
1168         }
1169
1170         if (src->found_dir_item)
1171                 dst->found_dir_item = 1;
1172         if (src->found_file_extent)
1173                 dst->found_file_extent = 1;
1174         if (src->found_csum_item)
1175                 dst->found_csum_item = 1;
1176         if (src->some_csum_missing)
1177                 dst->some_csum_missing = 1;
1178         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1180                 if (ret < 0)
1181                         return ret;
1182         }
1183
1184         BUG_ON(src->found_link < dir_count);
1185         dst->found_link += src->found_link - dir_count;
1186         dst->found_size += src->found_size;
1187         if (src->extent_start != (u64)-1) {
1188                 if (dst->extent_start == (u64)-1) {
1189                         dst->extent_start = src->extent_start;
1190                         dst->extent_end = src->extent_end;
1191                 } else {
1192                         if (dst->extent_end > src->extent_start)
1193                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194                         else if (dst->extent_end < src->extent_start) {
1195                                 ret = add_file_extent_hole(&dst->holes,
1196                                         dst->extent_end,
1197                                         src->extent_start - dst->extent_end);
1198                         }
1199                         if (dst->extent_end < src->extent_end)
1200                                 dst->extent_end = src->extent_end;
1201                 }
1202         }
1203
1204         dst->errors |= src->errors;
1205         if (src->found_inode_item) {
1206                 if (!dst->found_inode_item) {
1207                         dst->nlink = src->nlink;
1208                         dst->isize = src->isize;
1209                         dst->nbytes = src->nbytes;
1210                         dst->imode = src->imode;
1211                         dst->nodatasum = src->nodatasum;
1212                         dst->found_inode_item = 1;
1213                 } else {
1214                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1215                 }
1216         }
1217         dst->merging = 0;
1218
1219         return 0;
1220 }
1221
1222 static int splice_shared_node(struct shared_node *src_node,
1223                               struct shared_node *dst_node)
1224 {
1225         struct cache_extent *cache;
1226         struct ptr_node *node, *ins;
1227         struct cache_tree *src, *dst;
1228         struct inode_record *rec, *conflict;
1229         u64 current_ino = 0;
1230         int splice = 0;
1231         int ret;
1232
1233         if (--src_node->refs == 0)
1234                 splice = 1;
1235         if (src_node->current)
1236                 current_ino = src_node->current->ino;
1237
1238         src = &src_node->root_cache;
1239         dst = &dst_node->root_cache;
1240 again:
1241         cache = search_cache_extent(src, 0);
1242         while (cache) {
1243                 node = container_of(cache, struct ptr_node, cache);
1244                 rec = node->data;
1245                 cache = next_cache_extent(cache);
1246
1247                 if (splice) {
1248                         remove_cache_extent(src, &node->cache);
1249                         ins = node;
1250                 } else {
1251                         ins = malloc(sizeof(*ins));
1252                         BUG_ON(!ins);
1253                         ins->cache.start = node->cache.start;
1254                         ins->cache.size = node->cache.size;
1255                         ins->data = rec;
1256                         rec->refs++;
1257                 }
1258                 ret = insert_cache_extent(dst, &ins->cache);
1259                 if (ret == -EEXIST) {
1260                         conflict = get_inode_rec(dst, rec->ino, 1);
1261                         BUG_ON(IS_ERR(conflict));
1262                         merge_inode_recs(rec, conflict, dst);
1263                         if (rec->checked) {
1264                                 conflict->checked = 1;
1265                                 if (dst_node->current == conflict)
1266                                         dst_node->current = NULL;
1267                         }
1268                         maybe_free_inode_rec(dst, conflict);
1269                         free_inode_rec(rec);
1270                         free(ins);
1271                 } else {
1272                         BUG_ON(ret);
1273                 }
1274         }
1275
1276         if (src == &src_node->root_cache) {
1277                 src = &src_node->inode_cache;
1278                 dst = &dst_node->inode_cache;
1279                 goto again;
1280         }
1281
1282         if (current_ino > 0 && (!dst_node->current ||
1283             current_ino > dst_node->current->ino)) {
1284                 if (dst_node->current) {
1285                         dst_node->current->checked = 1;
1286                         maybe_free_inode_rec(dst, dst_node->current);
1287                 }
1288                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289                 BUG_ON(IS_ERR(dst_node->current));
1290         }
1291         return 0;
1292 }
1293
1294 static void free_inode_ptr(struct cache_extent *cache)
1295 {
1296         struct ptr_node *node;
1297         struct inode_record *rec;
1298
1299         node = container_of(cache, struct ptr_node, cache);
1300         rec = node->data;
1301         free_inode_rec(rec);
1302         free(node);
1303 }
1304
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1308                                             u64 bytenr)
1309 {
1310         struct cache_extent *cache;
1311         struct shared_node *node;
1312
1313         cache = lookup_cache_extent(shared, bytenr, 1);
1314         if (cache) {
1315                 node = container_of(cache, struct shared_node, cache);
1316                 return node;
1317         }
1318         return NULL;
1319 }
1320
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1322 {
1323         int ret;
1324         struct shared_node *node;
1325
1326         node = calloc(1, sizeof(*node));
1327         if (!node)
1328                 return -ENOMEM;
1329         node->cache.start = bytenr;
1330         node->cache.size = 1;
1331         cache_tree_init(&node->root_cache);
1332         cache_tree_init(&node->inode_cache);
1333         node->refs = refs;
1334
1335         ret = insert_cache_extent(shared, &node->cache);
1336
1337         return ret;
1338 }
1339
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341                              struct walk_control *wc, int level)
1342 {
1343         struct shared_node *node;
1344         struct shared_node *dest;
1345         int ret;
1346
1347         if (level == wc->active_node)
1348                 return 0;
1349
1350         BUG_ON(wc->active_node <= level);
1351         node = find_shared_node(&wc->shared, bytenr);
1352         if (!node) {
1353                 ret = add_shared_node(&wc->shared, bytenr, refs);
1354                 BUG_ON(ret);
1355                 node = find_shared_node(&wc->shared, bytenr);
1356                 wc->nodes[level] = node;
1357                 wc->active_node = level;
1358                 return 0;
1359         }
1360
1361         if (wc->root_level == wc->active_node &&
1362             btrfs_root_refs(&root->root_item) == 0) {
1363                 if (--node->refs == 0) {
1364                         free_inode_recs_tree(&node->root_cache);
1365                         free_inode_recs_tree(&node->inode_cache);
1366                         remove_cache_extent(&wc->shared, &node->cache);
1367                         free(node);
1368                 }
1369                 return 1;
1370         }
1371
1372         dest = wc->nodes[wc->active_node];
1373         splice_shared_node(node, dest);
1374         if (node->refs == 0) {
1375                 remove_cache_extent(&wc->shared, &node->cache);
1376                 free(node);
1377         }
1378         return 1;
1379 }
1380
1381 static int leave_shared_node(struct btrfs_root *root,
1382                              struct walk_control *wc, int level)
1383 {
1384         struct shared_node *node;
1385         struct shared_node *dest;
1386         int i;
1387
1388         if (level == wc->root_level)
1389                 return 0;
1390
1391         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1392                 if (wc->nodes[i])
1393                         break;
1394         }
1395         BUG_ON(i >= BTRFS_MAX_LEVEL);
1396
1397         node = wc->nodes[wc->active_node];
1398         wc->nodes[wc->active_node] = NULL;
1399         wc->active_node = i;
1400
1401         dest = wc->nodes[wc->active_node];
1402         if (wc->active_node < wc->root_level ||
1403             btrfs_root_refs(&root->root_item) > 0) {
1404                 BUG_ON(node->refs <= 1);
1405                 splice_shared_node(node, dest);
1406         } else {
1407                 BUG_ON(node->refs < 2);
1408                 node->refs--;
1409         }
1410         return 0;
1411 }
1412
1413 /*
1414  * Returns:
1415  * < 0 - on error
1416  * 1   - if the root with id child_root_id is a child of root parent_root_id
1417  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1418  *       has other root(s) as parent(s)
1419  * 2   - if the root child_root_id doesn't have any parent roots
1420  */
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1422                          u64 child_root_id)
1423 {
1424         struct btrfs_path path;
1425         struct btrfs_key key;
1426         struct extent_buffer *leaf;
1427         int has_parent = 0;
1428         int ret;
1429
1430         btrfs_init_path(&path);
1431
1432         key.objectid = parent_root_id;
1433         key.type = BTRFS_ROOT_REF_KEY;
1434         key.offset = child_root_id;
1435         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1436                                 0, 0);
1437         if (ret < 0)
1438                 return ret;
1439         btrfs_release_path(&path);
1440         if (!ret)
1441                 return 1;
1442
1443         key.objectid = child_root_id;
1444         key.type = BTRFS_ROOT_BACKREF_KEY;
1445         key.offset = 0;
1446         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1447                                 0, 0);
1448         if (ret < 0)
1449                 goto out;
1450
1451         while (1) {
1452                 leaf = path.nodes[0];
1453                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1455                         if (ret)
1456                                 break;
1457                         leaf = path.nodes[0];
1458                 }
1459
1460                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461                 if (key.objectid != child_root_id ||
1462                     key.type != BTRFS_ROOT_BACKREF_KEY)
1463                         break;
1464
1465                 has_parent = 1;
1466
1467                 if (key.offset == parent_root_id) {
1468                         btrfs_release_path(&path);
1469                         return 1;
1470                 }
1471
1472                 path.slots[0]++;
1473         }
1474 out:
1475         btrfs_release_path(&path);
1476         if (ret < 0)
1477                 return ret;
1478         return has_parent ? 0 : 2;
1479 }
1480
1481 static int process_dir_item(struct extent_buffer *eb,
1482                             int slot, struct btrfs_key *key,
1483                             struct shared_node *active_node)
1484 {
1485         u32 total;
1486         u32 cur = 0;
1487         u32 len;
1488         u32 name_len;
1489         u32 data_len;
1490         int error;
1491         int nritems = 0;
1492         u8 filetype;
1493         struct btrfs_dir_item *di;
1494         struct inode_record *rec;
1495         struct cache_tree *root_cache;
1496         struct cache_tree *inode_cache;
1497         struct btrfs_key location;
1498         char namebuf[BTRFS_NAME_LEN];
1499
1500         root_cache = &active_node->root_cache;
1501         inode_cache = &active_node->inode_cache;
1502         rec = active_node->current;
1503         rec->found_dir_item = 1;
1504
1505         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506         total = btrfs_item_size_nr(eb, slot);
1507         while (cur < total) {
1508                 nritems++;
1509                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510                 name_len = btrfs_dir_name_len(eb, di);
1511                 data_len = btrfs_dir_data_len(eb, di);
1512                 filetype = btrfs_dir_type(eb, di);
1513
1514                 rec->found_size += name_len;
1515                 if (cur + sizeof(*di) + name_len > total ||
1516                     name_len > BTRFS_NAME_LEN) {
1517                         error = REF_ERR_NAME_TOO_LONG;
1518
1519                         if (cur + sizeof(*di) > total)
1520                                 break;
1521                         len = min_t(u32, total - cur - sizeof(*di),
1522                                     BTRFS_NAME_LEN);
1523                 } else {
1524                         len = name_len;
1525                         error = 0;
1526                 }
1527
1528                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1529
1530                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1531                     key->offset != btrfs_name_hash(namebuf, len)) {
1532                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1533                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1534                         key->objectid, key->offset, namebuf, len, filetype,
1535                         key->offset, btrfs_name_hash(namebuf, len));
1536                 }
1537
1538                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1539                         add_inode_backref(inode_cache, location.objectid,
1540                                           key->objectid, key->offset, namebuf,
1541                                           len, filetype, key->type, error);
1542                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1543                         add_inode_backref(root_cache, location.objectid,
1544                                           key->objectid, key->offset,
1545                                           namebuf, len, filetype,
1546                                           key->type, error);
1547                 } else {
1548                         fprintf(stderr, "invalid location in dir item %u\n",
1549                                 location.type);
1550                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1551                                           key->objectid, key->offset, namebuf,
1552                                           len, filetype, key->type, error);
1553                 }
1554
1555                 len = sizeof(*di) + name_len + data_len;
1556                 di = (struct btrfs_dir_item *)((char *)di + len);
1557                 cur += len;
1558         }
1559         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1560                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1561
1562         return 0;
1563 }
1564
1565 static int process_inode_ref(struct extent_buffer *eb,
1566                              int slot, struct btrfs_key *key,
1567                              struct shared_node *active_node)
1568 {
1569         u32 total;
1570         u32 cur = 0;
1571         u32 len;
1572         u32 name_len;
1573         u64 index;
1574         int error;
1575         struct cache_tree *inode_cache;
1576         struct btrfs_inode_ref *ref;
1577         char namebuf[BTRFS_NAME_LEN];
1578
1579         inode_cache = &active_node->inode_cache;
1580
1581         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1582         total = btrfs_item_size_nr(eb, slot);
1583         while (cur < total) {
1584                 name_len = btrfs_inode_ref_name_len(eb, ref);
1585                 index = btrfs_inode_ref_index(eb, ref);
1586
1587                 /* inode_ref + namelen should not cross item boundary */
1588                 if (cur + sizeof(*ref) + name_len > total ||
1589                     name_len > BTRFS_NAME_LEN) {
1590                         if (total < cur + sizeof(*ref))
1591                                 break;
1592
1593                         /* Still try to read out the remaining part */
1594                         len = min_t(u32, total - cur - sizeof(*ref),
1595                                     BTRFS_NAME_LEN);
1596                         error = REF_ERR_NAME_TOO_LONG;
1597                 } else {
1598                         len = name_len;
1599                         error = 0;
1600                 }
1601
1602                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1603                 add_inode_backref(inode_cache, key->objectid, key->offset,
1604                                   index, namebuf, len, 0, key->type, error);
1605
1606                 len = sizeof(*ref) + name_len;
1607                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1608                 cur += len;
1609         }
1610         return 0;
1611 }
1612
1613 static int process_inode_extref(struct extent_buffer *eb,
1614                                 int slot, struct btrfs_key *key,
1615                                 struct shared_node *active_node)
1616 {
1617         u32 total;
1618         u32 cur = 0;
1619         u32 len;
1620         u32 name_len;
1621         u64 index;
1622         u64 parent;
1623         int error;
1624         struct cache_tree *inode_cache;
1625         struct btrfs_inode_extref *extref;
1626         char namebuf[BTRFS_NAME_LEN];
1627
1628         inode_cache = &active_node->inode_cache;
1629
1630         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1631         total = btrfs_item_size_nr(eb, slot);
1632         while (cur < total) {
1633                 name_len = btrfs_inode_extref_name_len(eb, extref);
1634                 index = btrfs_inode_extref_index(eb, extref);
1635                 parent = btrfs_inode_extref_parent(eb, extref);
1636                 if (name_len <= BTRFS_NAME_LEN) {
1637                         len = name_len;
1638                         error = 0;
1639                 } else {
1640                         len = BTRFS_NAME_LEN;
1641                         error = REF_ERR_NAME_TOO_LONG;
1642                 }
1643                 read_extent_buffer(eb, namebuf,
1644                                    (unsigned long)(extref + 1), len);
1645                 add_inode_backref(inode_cache, key->objectid, parent,
1646                                   index, namebuf, len, 0, key->type, error);
1647
1648                 len = sizeof(*extref) + name_len;
1649                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1650                 cur += len;
1651         }
1652         return 0;
1653
1654 }
1655
1656 static int count_csum_range(struct btrfs_root *root, u64 start,
1657                             u64 len, u64 *found)
1658 {
1659         struct btrfs_key key;
1660         struct btrfs_path path;
1661         struct extent_buffer *leaf;
1662         int ret;
1663         size_t size;
1664         *found = 0;
1665         u64 csum_end;
1666         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1667
1668         btrfs_init_path(&path);
1669
1670         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1671         key.offset = start;
1672         key.type = BTRFS_EXTENT_CSUM_KEY;
1673
1674         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1675                                 &key, &path, 0, 0);
1676         if (ret < 0)
1677                 goto out;
1678         if (ret > 0 && path.slots[0] > 0) {
1679                 leaf = path.nodes[0];
1680                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1681                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1682                     key.type == BTRFS_EXTENT_CSUM_KEY)
1683                         path.slots[0]--;
1684         }
1685
1686         while (len > 0) {
1687                 leaf = path.nodes[0];
1688                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1689                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1690                         if (ret > 0)
1691                                 break;
1692                         else if (ret < 0)
1693                                 goto out;
1694                         leaf = path.nodes[0];
1695                 }
1696
1697                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1698                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1699                     key.type != BTRFS_EXTENT_CSUM_KEY)
1700                         break;
1701
1702                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1703                 if (key.offset >= start + len)
1704                         break;
1705
1706                 if (key.offset > start)
1707                         start = key.offset;
1708
1709                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1710                 csum_end = key.offset + (size / csum_size) *
1711                            root->fs_info->sectorsize;
1712                 if (csum_end > start) {
1713                         size = min(csum_end - start, len);
1714                         len -= size;
1715                         start += size;
1716                         *found += size;
1717                 }
1718
1719                 path.slots[0]++;
1720         }
1721 out:
1722         btrfs_release_path(&path);
1723         if (ret < 0)
1724                 return ret;
1725         return 0;
1726 }
1727
1728 static int process_file_extent(struct btrfs_root *root,
1729                                 struct extent_buffer *eb,
1730                                 int slot, struct btrfs_key *key,
1731                                 struct shared_node *active_node)
1732 {
1733         struct inode_record *rec;
1734         struct btrfs_file_extent_item *fi;
1735         u64 num_bytes = 0;
1736         u64 disk_bytenr = 0;
1737         u64 extent_offset = 0;
1738         u64 mask = root->fs_info->sectorsize - 1;
1739         int extent_type;
1740         int ret;
1741
1742         rec = active_node->current;
1743         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1744         rec->found_file_extent = 1;
1745
1746         if (rec->extent_start == (u64)-1) {
1747                 rec->extent_start = key->offset;
1748                 rec->extent_end = key->offset;
1749         }
1750
1751         if (rec->extent_end > key->offset)
1752                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1753         else if (rec->extent_end < key->offset) {
1754                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1755                                            key->offset - rec->extent_end);
1756                 if (ret < 0)
1757                         return ret;
1758         }
1759
1760         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1761         extent_type = btrfs_file_extent_type(eb, fi);
1762
1763         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1764                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1765                 if (num_bytes == 0)
1766                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1767                 rec->found_size += num_bytes;
1768                 num_bytes = (num_bytes + mask) & ~mask;
1769         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1770                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1771                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1772                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1773                 extent_offset = btrfs_file_extent_offset(eb, fi);
1774                 if (num_bytes == 0 || (num_bytes & mask))
1775                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1776                 if (num_bytes + extent_offset >
1777                     btrfs_file_extent_ram_bytes(eb, fi))
1778                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1779                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1780                     (btrfs_file_extent_compression(eb, fi) ||
1781                      btrfs_file_extent_encryption(eb, fi) ||
1782                      btrfs_file_extent_other_encoding(eb, fi)))
1783                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1784                 if (disk_bytenr > 0)
1785                         rec->found_size += num_bytes;
1786         } else {
1787                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1788         }
1789         rec->extent_end = key->offset + num_bytes;
1790
1791         /*
1792          * The data reloc tree will copy full extents into its inode and then
1793          * copy the corresponding csums.  Because the extent it copied could be
1794          * a preallocated extent that hasn't been written to yet there may be no
1795          * csums to copy, ergo we won't have csums for our file extent.  This is
1796          * ok so just don't bother checking csums if the inode belongs to the
1797          * data reloc tree.
1798          */
1799         if (disk_bytenr > 0 &&
1800             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1801                 u64 found;
1802                 if (btrfs_file_extent_compression(eb, fi))
1803                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1804                 else
1805                         disk_bytenr += extent_offset;
1806
1807                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1808                 if (ret < 0)
1809                         return ret;
1810                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1811                         if (found > 0)
1812                                 rec->found_csum_item = 1;
1813                         if (found < num_bytes)
1814                                 rec->some_csum_missing = 1;
1815                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1816                         if (found > 0)
1817                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1818                 }
1819         }
1820         return 0;
1821 }
1822
1823 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1824                             struct walk_control *wc)
1825 {
1826         struct btrfs_key key;
1827         u32 nritems;
1828         int i;
1829         int ret = 0;
1830         struct cache_tree *inode_cache;
1831         struct shared_node *active_node;
1832
1833         if (wc->root_level == wc->active_node &&
1834             btrfs_root_refs(&root->root_item) == 0)
1835                 return 0;
1836
1837         active_node = wc->nodes[wc->active_node];
1838         inode_cache = &active_node->inode_cache;
1839         nritems = btrfs_header_nritems(eb);
1840         for (i = 0; i < nritems; i++) {
1841                 btrfs_item_key_to_cpu(eb, &key, i);
1842
1843                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1844                         continue;
1845                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1846                         continue;
1847
1848                 if (active_node->current == NULL ||
1849                     active_node->current->ino < key.objectid) {
1850                         if (active_node->current) {
1851                                 active_node->current->checked = 1;
1852                                 maybe_free_inode_rec(inode_cache,
1853                                                      active_node->current);
1854                         }
1855                         active_node->current = get_inode_rec(inode_cache,
1856                                                              key.objectid, 1);
1857                         BUG_ON(IS_ERR(active_node->current));
1858                 }
1859                 switch (key.type) {
1860                 case BTRFS_DIR_ITEM_KEY:
1861                 case BTRFS_DIR_INDEX_KEY:
1862                         ret = process_dir_item(eb, i, &key, active_node);
1863                         break;
1864                 case BTRFS_INODE_REF_KEY:
1865                         ret = process_inode_ref(eb, i, &key, active_node);
1866                         break;
1867                 case BTRFS_INODE_EXTREF_KEY:
1868                         ret = process_inode_extref(eb, i, &key, active_node);
1869                         break;
1870                 case BTRFS_INODE_ITEM_KEY:
1871                         ret = process_inode_item(eb, i, &key, active_node);
1872                         break;
1873                 case BTRFS_EXTENT_DATA_KEY:
1874                         ret = process_file_extent(root, eb, i, &key,
1875                                                   active_node);
1876                         break;
1877                 default:
1878                         break;
1879                 };
1880         }
1881         return ret;
1882 }
1883
1884 struct node_refs {
1885         u64 bytenr[BTRFS_MAX_LEVEL];
1886         u64 refs[BTRFS_MAX_LEVEL];
1887         int need_check[BTRFS_MAX_LEVEL];
1888 };
1889
1890 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1891                              struct node_refs *nrefs, u64 level);
1892 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1893                             unsigned int ext_ref);
1894
1895 /*
1896  * Returns >0  Found error, not fatal, should continue
1897  * Returns <0  Fatal error, must exit the whole check
1898  * Returns 0   No errors found
1899  */
1900 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1901                                struct node_refs *nrefs, int *level, int ext_ref)
1902 {
1903         struct extent_buffer *cur = path->nodes[0];
1904         struct btrfs_key key;
1905         u64 cur_bytenr;
1906         u32 nritems;
1907         u64 first_ino = 0;
1908         int root_level = btrfs_header_level(root->node);
1909         int i;
1910         int ret = 0; /* Final return value */
1911         int err = 0; /* Positive error bitmap */
1912
1913         cur_bytenr = cur->start;
1914
1915         /* skip to first inode item or the first inode number change */
1916         nritems = btrfs_header_nritems(cur);
1917         for (i = 0; i < nritems; i++) {
1918                 btrfs_item_key_to_cpu(cur, &key, i);
1919                 if (i == 0)
1920                         first_ino = key.objectid;
1921                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1922                     (first_ino && first_ino != key.objectid))
1923                         break;
1924         }
1925         if (i == nritems) {
1926                 path->slots[0] = nritems;
1927                 return 0;
1928         }
1929         path->slots[0] = i;
1930
1931 again:
1932         err |= check_inode_item(root, path, ext_ref);
1933
1934         if (err & LAST_ITEM)
1935                 goto out;
1936
1937         /* still have inode items in thie leaf */
1938         if (cur->start == cur_bytenr)
1939                 goto again;
1940
1941         /*
1942          * we have switched to another leaf, above nodes may
1943          * have changed, here walk down the path, if a node
1944          * or leaf is shared, check whether we can skip this
1945          * node or leaf.
1946          */
1947         for (i = root_level; i >= 0; i--) {
1948                 if (path->nodes[i]->start == nrefs->bytenr[i])
1949                         continue;
1950
1951                 ret = update_nodes_refs(root,
1952                                 path->nodes[i]->start,
1953                                 nrefs, i);
1954                 if (ret)
1955                         goto out;
1956
1957                 if (!nrefs->need_check[i]) {
1958                         *level += 1;
1959                         break;
1960                 }
1961         }
1962
1963         for (i = 0; i < *level; i++) {
1964                 free_extent_buffer(path->nodes[i]);
1965                 path->nodes[i] = NULL;
1966         }
1967 out:
1968         err &= ~LAST_ITEM;
1969         if (err && !ret)
1970                 ret = err;
1971         return ret;
1972 }
1973
1974 static void reada_walk_down(struct btrfs_root *root,
1975                             struct extent_buffer *node, int slot)
1976 {
1977         struct btrfs_fs_info *fs_info = root->fs_info;
1978         u64 bytenr;
1979         u64 ptr_gen;
1980         u32 nritems;
1981         int i;
1982         int level;
1983
1984         level = btrfs_header_level(node);
1985         if (level != 1)
1986                 return;
1987
1988         nritems = btrfs_header_nritems(node);
1989         for (i = slot; i < nritems; i++) {
1990                 bytenr = btrfs_node_blockptr(node, i);
1991                 ptr_gen = btrfs_node_ptr_generation(node, i);
1992                 readahead_tree_block(fs_info, bytenr, ptr_gen);
1993         }
1994 }
1995
1996 /*
1997  * Check the child node/leaf by the following condition:
1998  * 1. the first item key of the node/leaf should be the same with the one
1999  *    in parent.
2000  * 2. block in parent node should match the child node/leaf.
2001  * 3. generation of parent node and child's header should be consistent.
2002  *
2003  * Or the child node/leaf pointed by the key in parent is not valid.
2004  *
2005  * We hope to check leaf owner too, but since subvol may share leaves,
2006  * which makes leaf owner check not so strong, key check should be
2007  * sufficient enough for that case.
2008  */
2009 static int check_child_node(struct extent_buffer *parent, int slot,
2010                             struct extent_buffer *child)
2011 {
2012         struct btrfs_key parent_key;
2013         struct btrfs_key child_key;
2014         int ret = 0;
2015
2016         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2017         if (btrfs_header_level(child) == 0)
2018                 btrfs_item_key_to_cpu(child, &child_key, 0);
2019         else
2020                 btrfs_node_key_to_cpu(child, &child_key, 0);
2021
2022         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2023                 ret = -EINVAL;
2024                 fprintf(stderr,
2025                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2026                         parent_key.objectid, parent_key.type, parent_key.offset,
2027                         child_key.objectid, child_key.type, child_key.offset);
2028         }
2029         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2030                 ret = -EINVAL;
2031                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2032                         btrfs_node_blockptr(parent, slot),
2033                         btrfs_header_bytenr(child));
2034         }
2035         if (btrfs_node_ptr_generation(parent, slot) !=
2036             btrfs_header_generation(child)) {
2037                 ret = -EINVAL;
2038                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2039                         btrfs_header_generation(child),
2040                         btrfs_node_ptr_generation(parent, slot));
2041         }
2042         return ret;
2043 }
2044
2045 /*
2046  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2047  * in every fs or file tree check. Here we find its all root ids, and only check
2048  * it in the fs or file tree which has the smallest root id.
2049  */
2050 static int need_check(struct btrfs_root *root, struct ulist *roots)
2051 {
2052         struct rb_node *node;
2053         struct ulist_node *u;
2054
2055         if (roots->nnodes == 1)
2056                 return 1;
2057
2058         node = rb_first(&roots->root);
2059         u = rb_entry(node, struct ulist_node, rb_node);
2060         /*
2061          * current root id is not smallest, we skip it and let it be checked
2062          * in the fs or file tree who hash the smallest root id.
2063          */
2064         if (root->objectid != u->val)
2065                 return 0;
2066
2067         return 1;
2068 }
2069
2070 /*
2071  * for a tree node or leaf, we record its reference count, so later if we still
2072  * process this node or leaf, don't need to compute its reference count again.
2073  */
2074 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2075                              struct node_refs *nrefs, u64 level)
2076 {
2077         int check, ret;
2078         u64 refs;
2079         struct ulist *roots;
2080
2081         if (nrefs->bytenr[level] != bytenr) {
2082                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2083                                        level, 1, &refs, NULL);
2084                 if (ret < 0)
2085                         return ret;
2086
2087                 nrefs->bytenr[level] = bytenr;
2088                 nrefs->refs[level] = refs;
2089                 if (refs > 1) {
2090                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2091                                                    0, &roots);
2092                         if (ret)
2093                                 return -EIO;
2094
2095                         check = need_check(root, roots);
2096                         ulist_free(roots);
2097                         nrefs->need_check[level] = check;
2098                 } else {
2099                         nrefs->need_check[level] = 1;
2100                 }
2101         }
2102
2103         return 0;
2104 }
2105
2106 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2107                           struct walk_control *wc, int *level,
2108                           struct node_refs *nrefs)
2109 {
2110         enum btrfs_tree_block_status status;
2111         u64 bytenr;
2112         u64 ptr_gen;
2113         struct btrfs_fs_info *fs_info = root->fs_info;
2114         struct extent_buffer *next;
2115         struct extent_buffer *cur;
2116         int ret, err = 0;
2117         u64 refs;
2118
2119         WARN_ON(*level < 0);
2120         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2121
2122         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2123                 refs = nrefs->refs[*level];
2124                 ret = 0;
2125         } else {
2126                 ret = btrfs_lookup_extent_info(NULL, root,
2127                                        path->nodes[*level]->start,
2128                                        *level, 1, &refs, NULL);
2129                 if (ret < 0) {
2130                         err = ret;
2131                         goto out;
2132                 }
2133                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2134                 nrefs->refs[*level] = refs;
2135         }
2136
2137         if (refs > 1) {
2138                 ret = enter_shared_node(root, path->nodes[*level]->start,
2139                                         refs, wc, *level);
2140                 if (ret > 0) {
2141                         err = ret;
2142                         goto out;
2143                 }
2144         }
2145
2146         while (*level >= 0) {
2147                 WARN_ON(*level < 0);
2148                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2149                 cur = path->nodes[*level];
2150
2151                 if (btrfs_header_level(cur) != *level)
2152                         WARN_ON(1);
2153
2154                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2155                         break;
2156                 if (*level == 0) {
2157                         ret = process_one_leaf(root, cur, wc);
2158                         if (ret < 0)
2159                                 err = ret;
2160                         break;
2161                 }
2162                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2163                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2164
2165                 if (bytenr == nrefs->bytenr[*level - 1]) {
2166                         refs = nrefs->refs[*level - 1];
2167                 } else {
2168                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2169                                         *level - 1, 1, &refs, NULL);
2170                         if (ret < 0) {
2171                                 refs = 0;
2172                         } else {
2173                                 nrefs->bytenr[*level - 1] = bytenr;
2174                                 nrefs->refs[*level - 1] = refs;
2175                         }
2176                 }
2177
2178                 if (refs > 1) {
2179                         ret = enter_shared_node(root, bytenr, refs,
2180                                                 wc, *level - 1);
2181                         if (ret > 0) {
2182                                 path->slots[*level]++;
2183                                 continue;
2184                         }
2185                 }
2186
2187                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2188                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2189                         free_extent_buffer(next);
2190                         reada_walk_down(root, cur, path->slots[*level]);
2191                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2192                         if (!extent_buffer_uptodate(next)) {
2193                                 struct btrfs_key node_key;
2194
2195                                 btrfs_node_key_to_cpu(path->nodes[*level],
2196                                                       &node_key,
2197                                                       path->slots[*level]);
2198                                 btrfs_add_corrupt_extent_record(root->fs_info,
2199                                                 &node_key,
2200                                                 path->nodes[*level]->start,
2201                                                 root->fs_info->nodesize,
2202                                                 *level);
2203                                 err = -EIO;
2204                                 goto out;
2205                         }
2206                 }
2207
2208                 ret = check_child_node(cur, path->slots[*level], next);
2209                 if (ret) {
2210                         free_extent_buffer(next);
2211                         err = ret;
2212                         goto out;
2213                 }
2214
2215                 if (btrfs_is_leaf(next))
2216                         status = btrfs_check_leaf(root, NULL, next);
2217                 else
2218                         status = btrfs_check_node(root, NULL, next);
2219                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2220                         free_extent_buffer(next);
2221                         err = -EIO;
2222                         goto out;
2223                 }
2224
2225                 *level = *level - 1;
2226                 free_extent_buffer(path->nodes[*level]);
2227                 path->nodes[*level] = next;
2228                 path->slots[*level] = 0;
2229         }
2230 out:
2231         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2232         return err;
2233 }
2234
2235 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2236                             unsigned int ext_ref);
2237
2238 /*
2239  * Returns >0  Found error, should continue
2240  * Returns <0  Fatal error, must exit the whole check
2241  * Returns 0   No errors found
2242  */
2243 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2244                              int *level, struct node_refs *nrefs, int ext_ref)
2245 {
2246         enum btrfs_tree_block_status status;
2247         u64 bytenr;
2248         u64 ptr_gen;
2249         struct btrfs_fs_info *fs_info = root->fs_info;
2250         struct extent_buffer *next;
2251         struct extent_buffer *cur;
2252         int ret;
2253
2254         WARN_ON(*level < 0);
2255         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2256
2257         ret = update_nodes_refs(root, path->nodes[*level]->start,
2258                                 nrefs, *level);
2259         if (ret < 0)
2260                 return ret;
2261
2262         while (*level >= 0) {
2263                 WARN_ON(*level < 0);
2264                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2265                 cur = path->nodes[*level];
2266
2267                 if (btrfs_header_level(cur) != *level)
2268                         WARN_ON(1);
2269
2270                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2271                         break;
2272                 /* Don't forgot to check leaf/node validation */
2273                 if (*level == 0) {
2274                         ret = btrfs_check_leaf(root, NULL, cur);
2275                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2276                                 ret = -EIO;
2277                                 break;
2278                         }
2279                         ret = process_one_leaf_v2(root, path, nrefs,
2280                                                   level, ext_ref);
2281                         break;
2282                 } else {
2283                         ret = btrfs_check_node(root, NULL, cur);
2284                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2285                                 ret = -EIO;
2286                                 break;
2287                         }
2288                 }
2289                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2290                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2291
2292                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2293                 if (ret)
2294                         break;
2295                 if (!nrefs->need_check[*level - 1]) {
2296                         path->slots[*level]++;
2297                         continue;
2298                 }
2299
2300                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2301                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2302                         free_extent_buffer(next);
2303                         reada_walk_down(root, cur, path->slots[*level]);
2304                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2305                         if (!extent_buffer_uptodate(next)) {
2306                                 struct btrfs_key node_key;
2307
2308                                 btrfs_node_key_to_cpu(path->nodes[*level],
2309                                                       &node_key,
2310                                                       path->slots[*level]);
2311                                 btrfs_add_corrupt_extent_record(fs_info,
2312                                                 &node_key,
2313                                                 path->nodes[*level]->start,
2314                                                 fs_info->nodesize,
2315                                                 *level);
2316                                 ret = -EIO;
2317                                 break;
2318                         }
2319                 }
2320
2321                 ret = check_child_node(cur, path->slots[*level], next);
2322                 if (ret < 0) 
2323                         break;
2324
2325                 if (btrfs_is_leaf(next))
2326                         status = btrfs_check_leaf(root, NULL, next);
2327                 else
2328                         status = btrfs_check_node(root, NULL, next);
2329                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2330                         free_extent_buffer(next);
2331                         ret = -EIO;
2332                         break;
2333                 }
2334
2335                 *level = *level - 1;
2336                 free_extent_buffer(path->nodes[*level]);
2337                 path->nodes[*level] = next;
2338                 path->slots[*level] = 0;
2339         }
2340         return ret;
2341 }
2342
2343 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2344                         struct walk_control *wc, int *level)
2345 {
2346         int i;
2347         struct extent_buffer *leaf;
2348
2349         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2350                 leaf = path->nodes[i];
2351                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2352                         path->slots[i]++;
2353                         *level = i;
2354                         return 0;
2355                 } else {
2356                         free_extent_buffer(path->nodes[*level]);
2357                         path->nodes[*level] = NULL;
2358                         BUG_ON(*level > wc->active_node);
2359                         if (*level == wc->active_node)
2360                                 leave_shared_node(root, wc, *level);
2361                         *level = i + 1;
2362                 }
2363         }
2364         return 1;
2365 }
2366
2367 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2368                            int *level)
2369 {
2370         int i;
2371         struct extent_buffer *leaf;
2372
2373         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2374                 leaf = path->nodes[i];
2375                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2376                         path->slots[i]++;
2377                         *level = i;
2378                         return 0;
2379                 } else {
2380                         free_extent_buffer(path->nodes[*level]);
2381                         path->nodes[*level] = NULL;
2382                         *level = i + 1;
2383                 }
2384         }
2385         return 1;
2386 }
2387
2388 static int check_root_dir(struct inode_record *rec)
2389 {
2390         struct inode_backref *backref;
2391         int ret = -1;
2392
2393         if (!rec->found_inode_item || rec->errors)
2394                 goto out;
2395         if (rec->nlink != 1 || rec->found_link != 0)
2396                 goto out;
2397         if (list_empty(&rec->backrefs))
2398                 goto out;
2399         backref = to_inode_backref(rec->backrefs.next);
2400         if (!backref->found_inode_ref)
2401                 goto out;
2402         if (backref->index != 0 || backref->namelen != 2 ||
2403             memcmp(backref->name, "..", 2))
2404                 goto out;
2405         if (backref->found_dir_index || backref->found_dir_item)
2406                 goto out;
2407         ret = 0;
2408 out:
2409         return ret;
2410 }
2411
2412 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2413                               struct btrfs_root *root, struct btrfs_path *path,
2414                               struct inode_record *rec)
2415 {
2416         struct btrfs_inode_item *ei;
2417         struct btrfs_key key;
2418         int ret;
2419
2420         key.objectid = rec->ino;
2421         key.type = BTRFS_INODE_ITEM_KEY;
2422         key.offset = (u64)-1;
2423
2424         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2425         if (ret < 0)
2426                 goto out;
2427         if (ret) {
2428                 if (!path->slots[0]) {
2429                         ret = -ENOENT;
2430                         goto out;
2431                 }
2432                 path->slots[0]--;
2433                 ret = 0;
2434         }
2435         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2436         if (key.objectid != rec->ino) {
2437                 ret = -ENOENT;
2438                 goto out;
2439         }
2440
2441         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2442                             struct btrfs_inode_item);
2443         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2444         btrfs_mark_buffer_dirty(path->nodes[0]);
2445         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2446         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2447                root->root_key.objectid);
2448 out:
2449         btrfs_release_path(path);
2450         return ret;
2451 }
2452
2453 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2454                                     struct btrfs_root *root,
2455                                     struct btrfs_path *path,
2456                                     struct inode_record *rec)
2457 {
2458         int ret;
2459
2460         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2461         btrfs_release_path(path);
2462         if (!ret)
2463                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2464         return ret;
2465 }
2466
2467 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2468                                struct btrfs_root *root,
2469                                struct btrfs_path *path,
2470                                struct inode_record *rec)
2471 {
2472         struct btrfs_inode_item *ei;
2473         struct btrfs_key key;
2474         int ret = 0;
2475
2476         key.objectid = rec->ino;
2477         key.type = BTRFS_INODE_ITEM_KEY;
2478         key.offset = 0;
2479
2480         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2481         if (ret) {
2482                 if (ret > 0)
2483                         ret = -ENOENT;
2484                 goto out;
2485         }
2486
2487         /* Since ret == 0, no need to check anything */
2488         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2489                             struct btrfs_inode_item);
2490         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2491         btrfs_mark_buffer_dirty(path->nodes[0]);
2492         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2493         printf("reset nbytes for ino %llu root %llu\n",
2494                rec->ino, root->root_key.objectid);
2495 out:
2496         btrfs_release_path(path);
2497         return ret;
2498 }
2499
2500 static int add_missing_dir_index(struct btrfs_root *root,
2501                                  struct cache_tree *inode_cache,
2502                                  struct inode_record *rec,
2503                                  struct inode_backref *backref)
2504 {
2505         struct btrfs_path path;
2506         struct btrfs_trans_handle *trans;
2507         struct btrfs_dir_item *dir_item;
2508         struct extent_buffer *leaf;
2509         struct btrfs_key key;
2510         struct btrfs_disk_key disk_key;
2511         struct inode_record *dir_rec;
2512         unsigned long name_ptr;
2513         u32 data_size = sizeof(*dir_item) + backref->namelen;
2514         int ret;
2515
2516         trans = btrfs_start_transaction(root, 1);
2517         if (IS_ERR(trans))
2518                 return PTR_ERR(trans);
2519
2520         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2521                 (unsigned long long)rec->ino);
2522
2523         btrfs_init_path(&path);
2524         key.objectid = backref->dir;
2525         key.type = BTRFS_DIR_INDEX_KEY;
2526         key.offset = backref->index;
2527         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2528         BUG_ON(ret);
2529
2530         leaf = path.nodes[0];
2531         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2532
2533         disk_key.objectid = cpu_to_le64(rec->ino);
2534         disk_key.type = BTRFS_INODE_ITEM_KEY;
2535         disk_key.offset = 0;
2536
2537         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2538         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2539         btrfs_set_dir_data_len(leaf, dir_item, 0);
2540         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2541         name_ptr = (unsigned long)(dir_item + 1);
2542         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2543         btrfs_mark_buffer_dirty(leaf);
2544         btrfs_release_path(&path);
2545         btrfs_commit_transaction(trans, root);
2546
2547         backref->found_dir_index = 1;
2548         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2549         BUG_ON(IS_ERR(dir_rec));
2550         if (!dir_rec)
2551                 return 0;
2552         dir_rec->found_size += backref->namelen;
2553         if (dir_rec->found_size == dir_rec->isize &&
2554             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2555                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2556         if (dir_rec->found_size != dir_rec->isize)
2557                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2558
2559         return 0;
2560 }
2561
2562 static int delete_dir_index(struct btrfs_root *root,
2563                             struct inode_backref *backref)
2564 {
2565         struct btrfs_trans_handle *trans;
2566         struct btrfs_dir_item *di;
2567         struct btrfs_path path;
2568         int ret = 0;
2569
2570         trans = btrfs_start_transaction(root, 1);
2571         if (IS_ERR(trans))
2572                 return PTR_ERR(trans);
2573
2574         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2575                 (unsigned long long)backref->dir,
2576                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2577                 (unsigned long long)root->objectid);
2578
2579         btrfs_init_path(&path);
2580         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2581                                     backref->name, backref->namelen,
2582                                     backref->index, -1);
2583         if (IS_ERR(di)) {
2584                 ret = PTR_ERR(di);
2585                 btrfs_release_path(&path);
2586                 btrfs_commit_transaction(trans, root);
2587                 if (ret == -ENOENT)
2588                         return 0;
2589                 return ret;
2590         }
2591
2592         if (!di)
2593                 ret = btrfs_del_item(trans, root, &path);
2594         else
2595                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2596         BUG_ON(ret);
2597         btrfs_release_path(&path);
2598         btrfs_commit_transaction(trans, root);
2599         return ret;
2600 }
2601
2602 static int create_inode_item(struct btrfs_root *root,
2603                              struct inode_record *rec,
2604                              int root_dir)
2605 {
2606         struct btrfs_trans_handle *trans;
2607         struct btrfs_inode_item inode_item;
2608         time_t now = time(NULL);
2609         int ret;
2610
2611         trans = btrfs_start_transaction(root, 1);
2612         if (IS_ERR(trans)) {
2613                 ret = PTR_ERR(trans);
2614                 return ret;
2615         }
2616
2617         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2618                 "be incomplete, please check permissions and content after "
2619                 "the fsck completes.\n", (unsigned long long)root->objectid,
2620                 (unsigned long long)rec->ino);
2621
2622         memset(&inode_item, 0, sizeof(inode_item));
2623         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2624         if (root_dir)
2625                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2626         else
2627                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2628         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2629         if (rec->found_dir_item) {
2630                 if (rec->found_file_extent)
2631                         fprintf(stderr, "root %llu inode %llu has both a dir "
2632                                 "item and extents, unsure if it is a dir or a "
2633                                 "regular file so setting it as a directory\n",
2634                                 (unsigned long long)root->objectid,
2635                                 (unsigned long long)rec->ino);
2636                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2637                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2638         } else if (!rec->found_dir_item) {
2639                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2640                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2641         }
2642         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2643         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2644         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2645         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2646         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2647         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2648         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2649         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2650
2651         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2652         BUG_ON(ret);
2653         btrfs_commit_transaction(trans, root);
2654         return 0;
2655 }
2656
2657 static int repair_inode_backrefs(struct btrfs_root *root,
2658                                  struct inode_record *rec,
2659                                  struct cache_tree *inode_cache,
2660                                  int delete)
2661 {
2662         struct inode_backref *tmp, *backref;
2663         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2664         int ret = 0;
2665         int repaired = 0;
2666
2667         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2668                 if (!delete && rec->ino == root_dirid) {
2669                         if (!rec->found_inode_item) {
2670                                 ret = create_inode_item(root, rec, 1);
2671                                 if (ret)
2672                                         break;
2673                                 repaired++;
2674                         }
2675                 }
2676
2677                 /* Index 0 for root dir's are special, don't mess with it */
2678                 if (rec->ino == root_dirid && backref->index == 0)
2679                         continue;
2680
2681                 if (delete &&
2682                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2683                      (backref->found_dir_index && backref->found_inode_ref &&
2684                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2685                         ret = delete_dir_index(root, backref);
2686                         if (ret)
2687                                 break;
2688                         repaired++;
2689                         list_del(&backref->list);
2690                         free(backref);
2691                         continue;
2692                 }
2693
2694                 if (!delete && !backref->found_dir_index &&
2695                     backref->found_dir_item && backref->found_inode_ref) {
2696                         ret = add_missing_dir_index(root, inode_cache, rec,
2697                                                     backref);
2698                         if (ret)
2699                                 break;
2700                         repaired++;
2701                         if (backref->found_dir_item &&
2702                             backref->found_dir_index) {
2703                                 if (!backref->errors &&
2704                                     backref->found_inode_ref) {
2705                                         list_del(&backref->list);
2706                                         free(backref);
2707                                         continue;
2708                                 }
2709                         }
2710                 }
2711
2712                 if (!delete && (!backref->found_dir_index &&
2713                                 !backref->found_dir_item &&
2714                                 backref->found_inode_ref)) {
2715                         struct btrfs_trans_handle *trans;
2716                         struct btrfs_key location;
2717
2718                         ret = check_dir_conflict(root, backref->name,
2719                                                  backref->namelen,
2720                                                  backref->dir,
2721                                                  backref->index);
2722                         if (ret) {
2723                                 /*
2724                                  * let nlink fixing routine to handle it,
2725                                  * which can do it better.
2726                                  */
2727                                 ret = 0;
2728                                 break;
2729                         }
2730                         location.objectid = rec->ino;
2731                         location.type = BTRFS_INODE_ITEM_KEY;
2732                         location.offset = 0;
2733
2734                         trans = btrfs_start_transaction(root, 1);
2735                         if (IS_ERR(trans)) {
2736                                 ret = PTR_ERR(trans);
2737                                 break;
2738                         }
2739                         fprintf(stderr, "adding missing dir index/item pair "
2740                                 "for inode %llu\n",
2741                                 (unsigned long long)rec->ino);
2742                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2743                                                     backref->namelen,
2744                                                     backref->dir, &location,
2745                                                     imode_to_type(rec->imode),
2746                                                     backref->index);
2747                         BUG_ON(ret);
2748                         btrfs_commit_transaction(trans, root);
2749                         repaired++;
2750                 }
2751
2752                 if (!delete && (backref->found_inode_ref &&
2753                                 backref->found_dir_index &&
2754                                 backref->found_dir_item &&
2755                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2756                                 !rec->found_inode_item)) {
2757                         ret = create_inode_item(root, rec, 0);
2758                         if (ret)
2759                                 break;
2760                         repaired++;
2761                 }
2762
2763         }
2764         return ret ? ret : repaired;
2765 }
2766
2767 /*
2768  * To determine the file type for nlink/inode_item repair
2769  *
2770  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2771  * Return -ENOENT if file type is not found.
2772  */
2773 static int find_file_type(struct inode_record *rec, u8 *type)
2774 {
2775         struct inode_backref *backref;
2776
2777         /* For inode item recovered case */
2778         if (rec->found_inode_item) {
2779                 *type = imode_to_type(rec->imode);
2780                 return 0;
2781         }
2782
2783         list_for_each_entry(backref, &rec->backrefs, list) {
2784                 if (backref->found_dir_index || backref->found_dir_item) {
2785                         *type = backref->filetype;
2786                         return 0;
2787                 }
2788         }
2789         return -ENOENT;
2790 }
2791
2792 /*
2793  * To determine the file name for nlink repair
2794  *
2795  * Return 0 if file name is found, set name and namelen.
2796  * Return -ENOENT if file name is not found.
2797  */
2798 static int find_file_name(struct inode_record *rec,
2799                           char *name, int *namelen)
2800 {
2801         struct inode_backref *backref;
2802
2803         list_for_each_entry(backref, &rec->backrefs, list) {
2804                 if (backref->found_dir_index || backref->found_dir_item ||
2805                     backref->found_inode_ref) {
2806                         memcpy(name, backref->name, backref->namelen);
2807                         *namelen = backref->namelen;
2808                         return 0;
2809                 }
2810         }
2811         return -ENOENT;
2812 }
2813
2814 /* Reset the nlink of the inode to the correct one */
2815 static int reset_nlink(struct btrfs_trans_handle *trans,
2816                        struct btrfs_root *root,
2817                        struct btrfs_path *path,
2818                        struct inode_record *rec)
2819 {
2820         struct inode_backref *backref;
2821         struct inode_backref *tmp;
2822         struct btrfs_key key;
2823         struct btrfs_inode_item *inode_item;
2824         int ret = 0;
2825
2826         /* We don't believe this either, reset it and iterate backref */
2827         rec->found_link = 0;
2828
2829         /* Remove all backref including the valid ones */
2830         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2831                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2832                                    backref->index, backref->name,
2833                                    backref->namelen, 0);
2834                 if (ret < 0)
2835                         goto out;
2836
2837                 /* remove invalid backref, so it won't be added back */
2838                 if (!(backref->found_dir_index &&
2839                       backref->found_dir_item &&
2840                       backref->found_inode_ref)) {
2841                         list_del(&backref->list);
2842                         free(backref);
2843                 } else {
2844                         rec->found_link++;
2845                 }
2846         }
2847
2848         /* Set nlink to 0 */
2849         key.objectid = rec->ino;
2850         key.type = BTRFS_INODE_ITEM_KEY;
2851         key.offset = 0;
2852         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2853         if (ret < 0)
2854                 goto out;
2855         if (ret > 0) {
2856                 ret = -ENOENT;
2857                 goto out;
2858         }
2859         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2860                                     struct btrfs_inode_item);
2861         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2862         btrfs_mark_buffer_dirty(path->nodes[0]);
2863         btrfs_release_path(path);
2864
2865         /*
2866          * Add back valid inode_ref/dir_item/dir_index,
2867          * add_link() will handle the nlink inc, so new nlink must be correct
2868          */
2869         list_for_each_entry(backref, &rec->backrefs, list) {
2870                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2871                                      backref->name, backref->namelen,
2872                                      backref->filetype, &backref->index, 1);
2873                 if (ret < 0)
2874                         goto out;
2875         }
2876 out:
2877         btrfs_release_path(path);
2878         return ret;
2879 }
2880
2881 static int get_highest_inode(struct btrfs_trans_handle *trans,
2882                                 struct btrfs_root *root,
2883                                 struct btrfs_path *path,
2884                                 u64 *highest_ino)
2885 {
2886         struct btrfs_key key, found_key;
2887         int ret;
2888
2889         btrfs_init_path(path);
2890         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2891         key.offset = -1;
2892         key.type = BTRFS_INODE_ITEM_KEY;
2893         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2894         if (ret == 1) {
2895                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2896                                 path->slots[0] - 1);
2897                 *highest_ino = found_key.objectid;
2898                 ret = 0;
2899         }
2900         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2901                 ret = -EOVERFLOW;
2902         btrfs_release_path(path);
2903         return ret;
2904 }
2905
2906 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2907                                struct btrfs_root *root,
2908                                struct btrfs_path *path,
2909                                struct inode_record *rec)
2910 {
2911         char *dir_name = "lost+found";
2912         char namebuf[BTRFS_NAME_LEN] = {0};
2913         u64 lost_found_ino;
2914         u32 mode = 0700;
2915         u8 type = 0;
2916         int namelen = 0;
2917         int name_recovered = 0;
2918         int type_recovered = 0;
2919         int ret = 0;
2920
2921         /*
2922          * Get file name and type first before these invalid inode ref
2923          * are deleted by remove_all_invalid_backref()
2924          */
2925         name_recovered = !find_file_name(rec, namebuf, &namelen);
2926         type_recovered = !find_file_type(rec, &type);
2927
2928         if (!name_recovered) {
2929                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2930                        rec->ino, rec->ino);
2931                 namelen = count_digits(rec->ino);
2932                 sprintf(namebuf, "%llu", rec->ino);
2933                 name_recovered = 1;
2934         }
2935         if (!type_recovered) {
2936                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2937                        rec->ino);
2938                 type = BTRFS_FT_REG_FILE;
2939                 type_recovered = 1;
2940         }
2941
2942         ret = reset_nlink(trans, root, path, rec);
2943         if (ret < 0) {
2944                 fprintf(stderr,
2945                         "Failed to reset nlink for inode %llu: %s\n",
2946                         rec->ino, strerror(-ret));
2947                 goto out;
2948         }
2949
2950         if (rec->found_link == 0) {
2951                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2952                 if (ret < 0)
2953                         goto out;
2954                 lost_found_ino++;
2955                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2956                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2957                                   mode);
2958                 if (ret < 0) {
2959                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2960                                 dir_name, strerror(-ret));
2961                         goto out;
2962                 }
2963                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2964                                      namebuf, namelen, type, NULL, 1);
2965                 /*
2966                  * Add ".INO" suffix several times to handle case where
2967                  * "FILENAME.INO" is already taken by another file.
2968                  */
2969                 while (ret == -EEXIST) {
2970                         /*
2971                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2972                          */
2973                         if (namelen + count_digits(rec->ino) + 1 >
2974                             BTRFS_NAME_LEN) {
2975                                 ret = -EFBIG;
2976                                 goto out;
2977                         }
2978                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2979                                  ".%llu", rec->ino);
2980                         namelen += count_digits(rec->ino) + 1;
2981                         ret = btrfs_add_link(trans, root, rec->ino,
2982                                              lost_found_ino, namebuf,
2983                                              namelen, type, NULL, 1);
2984                 }
2985                 if (ret < 0) {
2986                         fprintf(stderr,
2987                                 "Failed to link the inode %llu to %s dir: %s\n",
2988                                 rec->ino, dir_name, strerror(-ret));
2989                         goto out;
2990                 }
2991                 /*
2992                  * Just increase the found_link, don't actually add the
2993                  * backref. This will make things easier and this inode
2994                  * record will be freed after the repair is done.
2995                  * So fsck will not report problem about this inode.
2996                  */
2997                 rec->found_link++;
2998                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2999                        namelen, namebuf, dir_name);
3000         }
3001         printf("Fixed the nlink of inode %llu\n", rec->ino);
3002 out:
3003         /*
3004          * Clear the flag anyway, or we will loop forever for the same inode
3005          * as it will not be removed from the bad inode list and the dead loop
3006          * happens.
3007          */
3008         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3009         btrfs_release_path(path);
3010         return ret;
3011 }
3012
3013 /*
3014  * Check if there is any normal(reg or prealloc) file extent for given
3015  * ino.
3016  * This is used to determine the file type when neither its dir_index/item or
3017  * inode_item exists.
3018  *
3019  * This will *NOT* report error, if any error happens, just consider it does
3020  * not have any normal file extent.
3021  */
3022 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3023 {
3024         struct btrfs_path path;
3025         struct btrfs_key key;
3026         struct btrfs_key found_key;
3027         struct btrfs_file_extent_item *fi;
3028         u8 type;
3029         int ret = 0;
3030
3031         btrfs_init_path(&path);
3032         key.objectid = ino;
3033         key.type = BTRFS_EXTENT_DATA_KEY;
3034         key.offset = 0;
3035
3036         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3037         if (ret < 0) {
3038                 ret = 0;
3039                 goto out;
3040         }
3041         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3042                 ret = btrfs_next_leaf(root, &path);
3043                 if (ret) {
3044                         ret = 0;
3045                         goto out;
3046                 }
3047         }
3048         while (1) {
3049                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3050                                       path.slots[0]);
3051                 if (found_key.objectid != ino ||
3052                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3053                         break;
3054                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3055                                     struct btrfs_file_extent_item);
3056                 type = btrfs_file_extent_type(path.nodes[0], fi);
3057                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3058                         ret = 1;
3059                         goto out;
3060                 }
3061         }
3062 out:
3063         btrfs_release_path(&path);
3064         return ret;
3065 }
3066
3067 static u32 btrfs_type_to_imode(u8 type)
3068 {
3069         static u32 imode_by_btrfs_type[] = {
3070                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3071                 [BTRFS_FT_DIR]          = S_IFDIR,
3072                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3073                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3074                 [BTRFS_FT_FIFO]         = S_IFIFO,
3075                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3076                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3077         };
3078
3079         return imode_by_btrfs_type[(type)];
3080 }
3081
3082 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3083                                 struct btrfs_root *root,
3084                                 struct btrfs_path *path,
3085                                 struct inode_record *rec)
3086 {
3087         u8 filetype;
3088         u32 mode = 0700;
3089         int type_recovered = 0;
3090         int ret = 0;
3091
3092         printf("Trying to rebuild inode:%llu\n", rec->ino);
3093
3094         type_recovered = !find_file_type(rec, &filetype);
3095
3096         /*
3097          * Try to determine inode type if type not found.
3098          *
3099          * For found regular file extent, it must be FILE.
3100          * For found dir_item/index, it must be DIR.
3101          *
3102          * For undetermined one, use FILE as fallback.
3103          *
3104          * TODO:
3105          * 1. If found backref(inode_index/item is already handled) to it,
3106          *    it must be DIR.
3107          *    Need new inode-inode ref structure to allow search for that.
3108          */
3109         if (!type_recovered) {
3110                 if (rec->found_file_extent &&
3111                     find_normal_file_extent(root, rec->ino)) {
3112                         type_recovered = 1;
3113                         filetype = BTRFS_FT_REG_FILE;
3114                 } else if (rec->found_dir_item) {
3115                         type_recovered = 1;
3116                         filetype = BTRFS_FT_DIR;
3117                 } else if (!list_empty(&rec->orphan_extents)) {
3118                         type_recovered = 1;
3119                         filetype = BTRFS_FT_REG_FILE;
3120                 } else{
3121                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3122                                rec->ino);
3123                         type_recovered = 1;
3124                         filetype = BTRFS_FT_REG_FILE;
3125                 }
3126         }
3127
3128         ret = btrfs_new_inode(trans, root, rec->ino,
3129                               mode | btrfs_type_to_imode(filetype));
3130         if (ret < 0)
3131                 goto out;
3132
3133         /*
3134          * Here inode rebuild is done, we only rebuild the inode item,
3135          * don't repair the nlink(like move to lost+found).
3136          * That is the job of nlink repair.
3137          *
3138          * We just fill the record and return
3139          */
3140         rec->found_dir_item = 1;
3141         rec->imode = mode | btrfs_type_to_imode(filetype);
3142         rec->nlink = 0;
3143         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3144         /* Ensure the inode_nlinks repair function will be called */
3145         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3146 out:
3147         return ret;
3148 }
3149
3150 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3151                                       struct btrfs_root *root,
3152                                       struct btrfs_path *path,
3153                                       struct inode_record *rec)
3154 {
3155         struct orphan_data_extent *orphan;
3156         struct orphan_data_extent *tmp;
3157         int ret = 0;
3158
3159         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3160                 /*
3161                  * Check for conflicting file extents
3162                  *
3163                  * Here we don't know whether the extents is compressed or not,
3164                  * so we can only assume it not compressed nor data offset,
3165                  * and use its disk_len as extent length.
3166                  */
3167                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3168                                        orphan->offset, orphan->disk_len, 0);
3169                 btrfs_release_path(path);
3170                 if (ret < 0)
3171                         goto out;
3172                 if (!ret) {
3173                         fprintf(stderr,
3174                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3175                                 orphan->disk_bytenr, orphan->disk_len);
3176                         ret = btrfs_free_extent(trans,
3177                                         root->fs_info->extent_root,
3178                                         orphan->disk_bytenr, orphan->disk_len,
3179                                         0, root->objectid, orphan->objectid,
3180                                         orphan->offset);
3181                         if (ret < 0)
3182                                 goto out;
3183                 }
3184                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3185                                 orphan->offset, orphan->disk_bytenr,
3186                                 orphan->disk_len, orphan->disk_len);
3187                 if (ret < 0)
3188                         goto out;
3189
3190                 /* Update file size info */
3191                 rec->found_size += orphan->disk_len;
3192                 if (rec->found_size == rec->nbytes)
3193                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3194
3195                 /* Update the file extent hole info too */
3196                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3197                                            orphan->disk_len);
3198                 if (ret < 0)
3199                         goto out;
3200                 if (RB_EMPTY_ROOT(&rec->holes))
3201                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3202
3203                 list_del(&orphan->list);
3204                 free(orphan);
3205         }
3206         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3207 out:
3208         return ret;
3209 }
3210
3211 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3212                                         struct btrfs_root *root,
3213                                         struct btrfs_path *path,
3214                                         struct inode_record *rec)
3215 {
3216         struct rb_node *node;
3217         struct file_extent_hole *hole;
3218         int found = 0;
3219         int ret = 0;
3220
3221         node = rb_first(&rec->holes);
3222
3223         while (node) {
3224                 found = 1;
3225                 hole = rb_entry(node, struct file_extent_hole, node);
3226                 ret = btrfs_punch_hole(trans, root, rec->ino,
3227                                        hole->start, hole->len);
3228                 if (ret < 0)
3229                         goto out;
3230                 ret = del_file_extent_hole(&rec->holes, hole->start,
3231                                            hole->len);
3232                 if (ret < 0)
3233                         goto out;
3234                 if (RB_EMPTY_ROOT(&rec->holes))
3235                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3236                 node = rb_first(&rec->holes);
3237         }
3238         /* special case for a file losing all its file extent */
3239         if (!found) {
3240                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3241                                        round_up(rec->isize,
3242                                                 root->fs_info->sectorsize));
3243                 if (ret < 0)
3244                         goto out;
3245         }
3246         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3247                rec->ino, root->objectid);
3248 out:
3249         return ret;
3250 }
3251
3252 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3253 {
3254         struct btrfs_trans_handle *trans;
3255         struct btrfs_path path;
3256         int ret = 0;
3257
3258         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3259                              I_ERR_NO_ORPHAN_ITEM |
3260                              I_ERR_LINK_COUNT_WRONG |
3261                              I_ERR_NO_INODE_ITEM |
3262                              I_ERR_FILE_EXTENT_ORPHAN |
3263                              I_ERR_FILE_EXTENT_DISCOUNT|
3264                              I_ERR_FILE_NBYTES_WRONG)))
3265                 return rec->errors;
3266
3267         /*
3268          * For nlink repair, it may create a dir and add link, so
3269          * 2 for parent(256)'s dir_index and dir_item
3270          * 2 for lost+found dir's inode_item and inode_ref
3271          * 1 for the new inode_ref of the file
3272          * 2 for lost+found dir's dir_index and dir_item for the file
3273          */
3274         trans = btrfs_start_transaction(root, 7);
3275         if (IS_ERR(trans))
3276                 return PTR_ERR(trans);
3277
3278         btrfs_init_path(&path);
3279         if (rec->errors & I_ERR_NO_INODE_ITEM)
3280                 ret = repair_inode_no_item(trans, root, &path, rec);
3281         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3282                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3283         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3284                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3285         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3286                 ret = repair_inode_isize(trans, root, &path, rec);
3287         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3288                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3289         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3290                 ret = repair_inode_nlinks(trans, root, &path, rec);
3291         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3292                 ret = repair_inode_nbytes(trans, root, &path, rec);
3293         btrfs_commit_transaction(trans, root);
3294         btrfs_release_path(&path);
3295         return ret;
3296 }
3297
3298 static int check_inode_recs(struct btrfs_root *root,
3299                             struct cache_tree *inode_cache)
3300 {
3301         struct cache_extent *cache;
3302         struct ptr_node *node;
3303         struct inode_record *rec;
3304         struct inode_backref *backref;
3305         int stage = 0;
3306         int ret = 0;
3307         int err = 0;
3308         u64 error = 0;
3309         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3310
3311         if (btrfs_root_refs(&root->root_item) == 0) {
3312                 if (!cache_tree_empty(inode_cache))
3313                         fprintf(stderr, "warning line %d\n", __LINE__);
3314                 return 0;
3315         }
3316
3317         /*
3318          * We need to repair backrefs first because we could change some of the
3319          * errors in the inode recs.
3320          *
3321          * We also need to go through and delete invalid backrefs first and then
3322          * add the correct ones second.  We do this because we may get EEXIST
3323          * when adding back the correct index because we hadn't yet deleted the
3324          * invalid index.
3325          *
3326          * For example, if we were missing a dir index then the directories
3327          * isize would be wrong, so if we fixed the isize to what we thought it
3328          * would be and then fixed the backref we'd still have a invalid fs, so
3329          * we need to add back the dir index and then check to see if the isize
3330          * is still wrong.
3331          */
3332         while (stage < 3) {
3333                 stage++;
3334                 if (stage == 3 && !err)
3335                         break;
3336
3337                 cache = search_cache_extent(inode_cache, 0);
3338                 while (repair && cache) {
3339                         node = container_of(cache, struct ptr_node, cache);
3340                         rec = node->data;
3341                         cache = next_cache_extent(cache);
3342
3343                         /* Need to free everything up and rescan */
3344                         if (stage == 3) {
3345                                 remove_cache_extent(inode_cache, &node->cache);
3346                                 free(node);
3347                                 free_inode_rec(rec);
3348                                 continue;
3349                         }
3350
3351                         if (list_empty(&rec->backrefs))
3352                                 continue;
3353
3354                         ret = repair_inode_backrefs(root, rec, inode_cache,
3355                                                     stage == 1);
3356                         if (ret < 0) {
3357                                 err = ret;
3358                                 stage = 2;
3359                                 break;
3360                         } if (ret > 0) {
3361                                 err = -EAGAIN;
3362                         }
3363                 }
3364         }
3365         if (err)
3366                 return err;
3367
3368         rec = get_inode_rec(inode_cache, root_dirid, 0);
3369         BUG_ON(IS_ERR(rec));
3370         if (rec) {
3371                 ret = check_root_dir(rec);
3372                 if (ret) {
3373                         fprintf(stderr, "root %llu root dir %llu error\n",
3374                                 (unsigned long long)root->root_key.objectid,
3375                                 (unsigned long long)root_dirid);
3376                         print_inode_error(root, rec);
3377                         error++;
3378                 }
3379         } else {
3380                 if (repair) {
3381                         struct btrfs_trans_handle *trans;
3382
3383                         trans = btrfs_start_transaction(root, 1);
3384                         if (IS_ERR(trans)) {
3385                                 err = PTR_ERR(trans);
3386                                 return err;
3387                         }
3388
3389                         fprintf(stderr,
3390                                 "root %llu missing its root dir, recreating\n",
3391                                 (unsigned long long)root->objectid);
3392
3393                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3394                         BUG_ON(ret);
3395
3396                         btrfs_commit_transaction(trans, root);
3397                         return -EAGAIN;
3398                 }
3399
3400                 fprintf(stderr, "root %llu root dir %llu not found\n",
3401                         (unsigned long long)root->root_key.objectid,
3402                         (unsigned long long)root_dirid);
3403         }
3404
3405         while (1) {
3406                 cache = search_cache_extent(inode_cache, 0);
3407                 if (!cache)
3408                         break;
3409                 node = container_of(cache, struct ptr_node, cache);
3410                 rec = node->data;
3411                 remove_cache_extent(inode_cache, &node->cache);
3412                 free(node);
3413                 if (rec->ino == root_dirid ||
3414                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3415                         free_inode_rec(rec);
3416                         continue;
3417                 }
3418
3419                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3420                         ret = check_orphan_item(root, rec->ino);
3421                         if (ret == 0)
3422                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3423                         if (can_free_inode_rec(rec)) {
3424                                 free_inode_rec(rec);
3425                                 continue;
3426                         }
3427                 }
3428
3429                 if (!rec->found_inode_item)
3430                         rec->errors |= I_ERR_NO_INODE_ITEM;
3431                 if (rec->found_link != rec->nlink)
3432                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3433                 if (repair) {
3434                         ret = try_repair_inode(root, rec);
3435                         if (ret == 0 && can_free_inode_rec(rec)) {
3436                                 free_inode_rec(rec);
3437                                 continue;
3438                         }
3439                         ret = 0;
3440                 }
3441
3442                 if (!(repair && ret == 0))
3443                         error++;
3444                 print_inode_error(root, rec);
3445                 list_for_each_entry(backref, &rec->backrefs, list) {
3446                         if (!backref->found_dir_item)
3447                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3448                         if (!backref->found_dir_index)
3449                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3450                         if (!backref->found_inode_ref)
3451                                 backref->errors |= REF_ERR_NO_INODE_REF;
3452                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3453                                 " namelen %u name %s filetype %d errors %x",
3454                                 (unsigned long long)backref->dir,
3455                                 (unsigned long long)backref->index,
3456                                 backref->namelen, backref->name,
3457                                 backref->filetype, backref->errors);
3458                         print_ref_error(backref->errors);
3459                 }
3460                 free_inode_rec(rec);
3461         }
3462         return (error > 0) ? -1 : 0;
3463 }
3464
3465 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3466                                         u64 objectid)
3467 {
3468         struct cache_extent *cache;
3469         struct root_record *rec = NULL;
3470         int ret;
3471
3472         cache = lookup_cache_extent(root_cache, objectid, 1);
3473         if (cache) {
3474                 rec = container_of(cache, struct root_record, cache);
3475         } else {
3476                 rec = calloc(1, sizeof(*rec));
3477                 if (!rec)
3478                         return ERR_PTR(-ENOMEM);
3479                 rec->objectid = objectid;
3480                 INIT_LIST_HEAD(&rec->backrefs);
3481                 rec->cache.start = objectid;
3482                 rec->cache.size = 1;
3483
3484                 ret = insert_cache_extent(root_cache, &rec->cache);
3485                 if (ret)
3486                         return ERR_PTR(-EEXIST);
3487         }
3488         return rec;
3489 }
3490
3491 static struct root_backref *get_root_backref(struct root_record *rec,
3492                                              u64 ref_root, u64 dir, u64 index,
3493                                              const char *name, int namelen)
3494 {
3495         struct root_backref *backref;
3496
3497         list_for_each_entry(backref, &rec->backrefs, list) {
3498                 if (backref->ref_root != ref_root || backref->dir != dir ||
3499                     backref->namelen != namelen)
3500                         continue;
3501                 if (memcmp(name, backref->name, namelen))
3502                         continue;
3503                 return backref;
3504         }
3505
3506         backref = calloc(1, sizeof(*backref) + namelen + 1);
3507         if (!backref)
3508                 return NULL;
3509         backref->ref_root = ref_root;
3510         backref->dir = dir;
3511         backref->index = index;
3512         backref->namelen = namelen;
3513         memcpy(backref->name, name, namelen);
3514         backref->name[namelen] = '\0';
3515         list_add_tail(&backref->list, &rec->backrefs);
3516         return backref;
3517 }
3518
3519 static void free_root_record(struct cache_extent *cache)
3520 {
3521         struct root_record *rec;
3522         struct root_backref *backref;
3523
3524         rec = container_of(cache, struct root_record, cache);
3525         while (!list_empty(&rec->backrefs)) {
3526                 backref = to_root_backref(rec->backrefs.next);
3527                 list_del(&backref->list);
3528                 free(backref);
3529         }
3530
3531         free(rec);
3532 }
3533
3534 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3535
3536 static int add_root_backref(struct cache_tree *root_cache,
3537                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3538                             const char *name, int namelen,
3539                             int item_type, int errors)
3540 {
3541         struct root_record *rec;
3542         struct root_backref *backref;
3543
3544         rec = get_root_rec(root_cache, root_id);
3545         BUG_ON(IS_ERR(rec));
3546         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3547         BUG_ON(!backref);
3548
3549         backref->errors |= errors;
3550
3551         if (item_type != BTRFS_DIR_ITEM_KEY) {
3552                 if (backref->found_dir_index || backref->found_back_ref ||
3553                     backref->found_forward_ref) {
3554                         if (backref->index != index)
3555                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3556                 } else {
3557                         backref->index = index;
3558                 }
3559         }
3560
3561         if (item_type == BTRFS_DIR_ITEM_KEY) {
3562                 if (backref->found_forward_ref)
3563                         rec->found_ref++;
3564                 backref->found_dir_item = 1;
3565         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3566                 backref->found_dir_index = 1;
3567         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3568                 if (backref->found_forward_ref)
3569                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3570                 else if (backref->found_dir_item)
3571                         rec->found_ref++;
3572                 backref->found_forward_ref = 1;
3573         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3574                 if (backref->found_back_ref)
3575                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3576                 backref->found_back_ref = 1;
3577         } else {
3578                 BUG_ON(1);
3579         }
3580
3581         if (backref->found_forward_ref && backref->found_dir_item)
3582                 backref->reachable = 1;
3583         return 0;
3584 }
3585
3586 static int merge_root_recs(struct btrfs_root *root,
3587                            struct cache_tree *src_cache,
3588                            struct cache_tree *dst_cache)
3589 {
3590         struct cache_extent *cache;
3591         struct ptr_node *node;
3592         struct inode_record *rec;
3593         struct inode_backref *backref;
3594         int ret = 0;
3595
3596         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3597                 free_inode_recs_tree(src_cache);
3598                 return 0;
3599         }
3600
3601         while (1) {
3602                 cache = search_cache_extent(src_cache, 0);
3603                 if (!cache)
3604                         break;
3605                 node = container_of(cache, struct ptr_node, cache);
3606                 rec = node->data;
3607                 remove_cache_extent(src_cache, &node->cache);
3608                 free(node);
3609
3610                 ret = is_child_root(root, root->objectid, rec->ino);
3611                 if (ret < 0)
3612                         break;
3613                 else if (ret == 0)
3614                         goto skip;
3615
3616                 list_for_each_entry(backref, &rec->backrefs, list) {
3617                         BUG_ON(backref->found_inode_ref);
3618                         if (backref->found_dir_item)
3619                                 add_root_backref(dst_cache, rec->ino,
3620                                         root->root_key.objectid, backref->dir,
3621                                         backref->index, backref->name,
3622                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3623                                         backref->errors);
3624                         if (backref->found_dir_index)
3625                                 add_root_backref(dst_cache, rec->ino,
3626                                         root->root_key.objectid, backref->dir,
3627                                         backref->index, backref->name,
3628                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3629                                         backref->errors);
3630                 }
3631 skip:
3632                 free_inode_rec(rec);
3633         }
3634         if (ret < 0)
3635                 return ret;
3636         return 0;
3637 }
3638
3639 static int check_root_refs(struct btrfs_root *root,
3640                            struct cache_tree *root_cache)
3641 {
3642         struct root_record *rec;
3643         struct root_record *ref_root;
3644         struct root_backref *backref;
3645         struct cache_extent *cache;
3646         int loop = 1;
3647         int ret;
3648         int error;
3649         int errors = 0;
3650
3651         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3652         BUG_ON(IS_ERR(rec));
3653         rec->found_ref = 1;
3654
3655         /* fixme: this can not detect circular references */
3656         while (loop) {
3657                 loop = 0;
3658                 cache = search_cache_extent(root_cache, 0);
3659                 while (1) {
3660                         if (!cache)
3661                                 break;
3662                         rec = container_of(cache, struct root_record, cache);
3663                         cache = next_cache_extent(cache);
3664
3665                         if (rec->found_ref == 0)
3666                                 continue;
3667
3668                         list_for_each_entry(backref, &rec->backrefs, list) {
3669                                 if (!backref->reachable)
3670                                         continue;
3671
3672                                 ref_root = get_root_rec(root_cache,
3673                                                         backref->ref_root);
3674                                 BUG_ON(IS_ERR(ref_root));
3675                                 if (ref_root->found_ref > 0)
3676                                         continue;
3677
3678                                 backref->reachable = 0;
3679                                 rec->found_ref--;
3680                                 if (rec->found_ref == 0)
3681                                         loop = 1;
3682                         }
3683                 }
3684         }
3685
3686         cache = search_cache_extent(root_cache, 0);
3687         while (1) {
3688                 if (!cache)
3689                         break;
3690                 rec = container_of(cache, struct root_record, cache);
3691                 cache = next_cache_extent(cache);
3692
3693                 if (rec->found_ref == 0 &&
3694                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3695                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3696                         ret = check_orphan_item(root->fs_info->tree_root,
3697                                                 rec->objectid);
3698                         if (ret == 0)
3699                                 continue;
3700
3701                         /*
3702                          * If we don't have a root item then we likely just have
3703                          * a dir item in a snapshot for this root but no actual
3704                          * ref key or anything so it's meaningless.
3705                          */
3706                         if (!rec->found_root_item)
3707                                 continue;
3708                         errors++;
3709                         fprintf(stderr, "fs tree %llu not referenced\n",
3710                                 (unsigned long long)rec->objectid);
3711                 }
3712
3713                 error = 0;
3714                 if (rec->found_ref > 0 && !rec->found_root_item)
3715                         error = 1;
3716                 list_for_each_entry(backref, &rec->backrefs, list) {
3717                         if (!backref->found_dir_item)
3718                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3719                         if (!backref->found_dir_index)
3720                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3721                         if (!backref->found_back_ref)
3722                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3723                         if (!backref->found_forward_ref)
3724                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3725                         if (backref->reachable && backref->errors)
3726                                 error = 1;
3727                 }
3728                 if (!error)
3729                         continue;
3730
3731                 errors++;
3732                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3733                         (unsigned long long)rec->objectid, rec->found_ref,
3734                          rec->found_root_item ? "" : "not found");
3735
3736                 list_for_each_entry(backref, &rec->backrefs, list) {
3737                         if (!backref->reachable)
3738                                 continue;
3739                         if (!backref->errors && rec->found_root_item)
3740                                 continue;
3741                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3742                                 " index %llu namelen %u name %s errors %x\n",
3743                                 (unsigned long long)backref->ref_root,
3744                                 (unsigned long long)backref->dir,
3745                                 (unsigned long long)backref->index,
3746                                 backref->namelen, backref->name,
3747                                 backref->errors);
3748                         print_ref_error(backref->errors);
3749                 }
3750         }
3751         return errors > 0 ? 1 : 0;
3752 }
3753
3754 static int process_root_ref(struct extent_buffer *eb, int slot,
3755                             struct btrfs_key *key,
3756                             struct cache_tree *root_cache)
3757 {
3758         u64 dirid;
3759         u64 index;
3760         u32 len;
3761         u32 name_len;
3762         struct btrfs_root_ref *ref;
3763         char namebuf[BTRFS_NAME_LEN];
3764         int error;
3765
3766         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3767
3768         dirid = btrfs_root_ref_dirid(eb, ref);
3769         index = btrfs_root_ref_sequence(eb, ref);
3770         name_len = btrfs_root_ref_name_len(eb, ref);
3771
3772         if (name_len <= BTRFS_NAME_LEN) {
3773                 len = name_len;
3774                 error = 0;
3775         } else {
3776                 len = BTRFS_NAME_LEN;
3777                 error = REF_ERR_NAME_TOO_LONG;
3778         }
3779         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3780
3781         if (key->type == BTRFS_ROOT_REF_KEY) {
3782                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3783                                  index, namebuf, len, key->type, error);
3784         } else {
3785                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3786                                  index, namebuf, len, key->type, error);
3787         }
3788         return 0;
3789 }
3790
3791 static void free_corrupt_block(struct cache_extent *cache)
3792 {
3793         struct btrfs_corrupt_block *corrupt;
3794
3795         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3796         free(corrupt);
3797 }
3798
3799 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3800
3801 /*
3802  * Repair the btree of the given root.
3803  *
3804  * The fix is to remove the node key in corrupt_blocks cache_tree.
3805  * and rebalance the tree.
3806  * After the fix, the btree should be writeable.
3807  */
3808 static int repair_btree(struct btrfs_root *root,
3809                         struct cache_tree *corrupt_blocks)
3810 {
3811         struct btrfs_trans_handle *trans;
3812         struct btrfs_path path;
3813         struct btrfs_corrupt_block *corrupt;
3814         struct cache_extent *cache;
3815         struct btrfs_key key;
3816         u64 offset;
3817         int level;
3818         int ret = 0;
3819
3820         if (cache_tree_empty(corrupt_blocks))
3821                 return 0;
3822
3823         trans = btrfs_start_transaction(root, 1);
3824         if (IS_ERR(trans)) {
3825                 ret = PTR_ERR(trans);
3826                 fprintf(stderr, "Error starting transaction: %s\n",
3827                         strerror(-ret));
3828                 return ret;
3829         }
3830         btrfs_init_path(&path);
3831         cache = first_cache_extent(corrupt_blocks);
3832         while (cache) {
3833                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3834                                        cache);
3835                 level = corrupt->level;
3836                 path.lowest_level = level;
3837                 key.objectid = corrupt->key.objectid;
3838                 key.type = corrupt->key.type;
3839                 key.offset = corrupt->key.offset;
3840
3841                 /*
3842                  * Here we don't want to do any tree balance, since it may
3843                  * cause a balance with corrupted brother leaf/node,
3844                  * so ins_len set to 0 here.
3845                  * Balance will be done after all corrupt node/leaf is deleted.
3846                  */
3847                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3848                 if (ret < 0)
3849                         goto out;
3850                 offset = btrfs_node_blockptr(path.nodes[level],
3851                                              path.slots[level]);
3852
3853                 /* Remove the ptr */
3854                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3855                 if (ret < 0)
3856                         goto out;
3857                 /*
3858                  * Remove the corresponding extent
3859                  * return value is not concerned.
3860                  */
3861                 btrfs_release_path(&path);
3862                 ret = btrfs_free_extent(trans, root, offset,
3863                                 root->fs_info->nodesize, 0,
3864                                 root->root_key.objectid, level - 1, 0);
3865                 cache = next_cache_extent(cache);
3866         }
3867
3868         /* Balance the btree using btrfs_search_slot() */
3869         cache = first_cache_extent(corrupt_blocks);
3870         while (cache) {
3871                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3872                                        cache);
3873                 memcpy(&key, &corrupt->key, sizeof(key));
3874                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3875                 if (ret < 0)
3876                         goto out;
3877                 /* return will always >0 since it won't find the item */
3878                 ret = 0;
3879                 btrfs_release_path(&path);
3880                 cache = next_cache_extent(cache);
3881         }
3882 out:
3883         btrfs_commit_transaction(trans, root);
3884         btrfs_release_path(&path);
3885         return ret;
3886 }
3887
3888 static int check_fs_root(struct btrfs_root *root,
3889                          struct cache_tree *root_cache,
3890                          struct walk_control *wc)
3891 {
3892         int ret = 0;
3893         int err = 0;
3894         int wret;
3895         int level;
3896         struct btrfs_path path;
3897         struct shared_node root_node;
3898         struct root_record *rec;
3899         struct btrfs_root_item *root_item = &root->root_item;
3900         struct cache_tree corrupt_blocks;
3901         struct orphan_data_extent *orphan;
3902         struct orphan_data_extent *tmp;
3903         enum btrfs_tree_block_status status;
3904         struct node_refs nrefs;
3905
3906         /*
3907          * Reuse the corrupt_block cache tree to record corrupted tree block
3908          *
3909          * Unlike the usage in extent tree check, here we do it in a per
3910          * fs/subvol tree base.
3911          */
3912         cache_tree_init(&corrupt_blocks);
3913         root->fs_info->corrupt_blocks = &corrupt_blocks;
3914
3915         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3916                 rec = get_root_rec(root_cache, root->root_key.objectid);
3917                 BUG_ON(IS_ERR(rec));
3918                 if (btrfs_root_refs(root_item) > 0)
3919                         rec->found_root_item = 1;
3920         }
3921
3922         btrfs_init_path(&path);
3923         memset(&root_node, 0, sizeof(root_node));
3924         cache_tree_init(&root_node.root_cache);
3925         cache_tree_init(&root_node.inode_cache);
3926         memset(&nrefs, 0, sizeof(nrefs));
3927
3928         /* Move the orphan extent record to corresponding inode_record */
3929         list_for_each_entry_safe(orphan, tmp,
3930                                  &root->orphan_data_extents, list) {
3931                 struct inode_record *inode;
3932
3933                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3934                                       1);
3935                 BUG_ON(IS_ERR(inode));
3936                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3937                 list_move(&orphan->list, &inode->orphan_extents);
3938         }
3939
3940         level = btrfs_header_level(root->node);
3941         memset(wc->nodes, 0, sizeof(wc->nodes));
3942         wc->nodes[level] = &root_node;
3943         wc->active_node = level;
3944         wc->root_level = level;
3945
3946         /* We may not have checked the root block, lets do that now */
3947         if (btrfs_is_leaf(root->node))
3948                 status = btrfs_check_leaf(root, NULL, root->node);
3949         else
3950                 status = btrfs_check_node(root, NULL, root->node);
3951         if (status != BTRFS_TREE_BLOCK_CLEAN)
3952                 return -EIO;
3953
3954         if (btrfs_root_refs(root_item) > 0 ||
3955             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3956                 path.nodes[level] = root->node;
3957                 extent_buffer_get(root->node);
3958                 path.slots[level] = 0;
3959         } else {
3960                 struct btrfs_key key;
3961                 struct btrfs_disk_key found_key;
3962
3963                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3964                 level = root_item->drop_level;
3965                 path.lowest_level = level;
3966                 if (level > btrfs_header_level(root->node) ||
3967                     level >= BTRFS_MAX_LEVEL) {
3968                         error("ignoring invalid drop level: %u", level);
3969                         goto skip_walking;
3970                 }
3971                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3972                 if (wret < 0)
3973                         goto skip_walking;
3974                 btrfs_node_key(path.nodes[level], &found_key,
3975                                 path.slots[level]);
3976                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3977                                         sizeof(found_key)));
3978         }
3979
3980         while (1) {
3981                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3982                 if (wret < 0)
3983                         ret = wret;
3984                 if (wret != 0)
3985                         break;
3986
3987                 wret = walk_up_tree(root, &path, wc, &level);
3988                 if (wret < 0)
3989                         ret = wret;
3990                 if (wret != 0)
3991                         break;
3992         }
3993 skip_walking:
3994         btrfs_release_path(&path);
3995
3996         if (!cache_tree_empty(&corrupt_blocks)) {
3997                 struct cache_extent *cache;
3998                 struct btrfs_corrupt_block *corrupt;
3999
4000                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4001                        root->root_key.objectid);
4002                 cache = first_cache_extent(&corrupt_blocks);
4003                 while (cache) {
4004                         corrupt = container_of(cache,
4005                                                struct btrfs_corrupt_block,
4006                                                cache);
4007                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4008                                cache->start, corrupt->level,
4009                                corrupt->key.objectid, corrupt->key.type,
4010                                corrupt->key.offset);
4011                         cache = next_cache_extent(cache);
4012                 }
4013                 if (repair) {
4014                         printf("Try to repair the btree for root %llu\n",
4015                                root->root_key.objectid);
4016                         ret = repair_btree(root, &corrupt_blocks);
4017                         if (ret < 0)
4018                                 fprintf(stderr, "Failed to repair btree: %s\n",
4019                                         strerror(-ret));
4020                         if (!ret)
4021                                 printf("Btree for root %llu is fixed\n",
4022                                        root->root_key.objectid);
4023                 }
4024         }
4025
4026         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4027         if (err < 0)
4028                 ret = err;
4029
4030         if (root_node.current) {
4031                 root_node.current->checked = 1;
4032                 maybe_free_inode_rec(&root_node.inode_cache,
4033                                 root_node.current);
4034         }
4035
4036         err = check_inode_recs(root, &root_node.inode_cache);
4037         if (!ret)
4038                 ret = err;
4039
4040         free_corrupt_blocks_tree(&corrupt_blocks);
4041         root->fs_info->corrupt_blocks = NULL;
4042         free_orphan_data_extents(&root->orphan_data_extents);
4043         return ret;
4044 }
4045
4046 static int fs_root_objectid(u64 objectid)
4047 {
4048         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4049             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4050                 return 1;
4051         return is_fstree(objectid);
4052 }
4053
4054 static int check_fs_roots(struct btrfs_root *root,
4055                           struct cache_tree *root_cache)
4056 {
4057         struct btrfs_path path;
4058         struct btrfs_key key;
4059         struct walk_control wc;
4060         struct extent_buffer *leaf, *tree_node;
4061         struct btrfs_root *tmp_root;
4062         struct btrfs_root *tree_root = root->fs_info->tree_root;
4063         int ret;
4064         int err = 0;
4065
4066         if (ctx.progress_enabled) {
4067                 ctx.tp = TASK_FS_ROOTS;
4068                 task_start(ctx.info);
4069         }
4070
4071         /*
4072          * Just in case we made any changes to the extent tree that weren't
4073          * reflected into the free space cache yet.
4074          */
4075         if (repair)
4076                 reset_cached_block_groups(root->fs_info);
4077         memset(&wc, 0, sizeof(wc));
4078         cache_tree_init(&wc.shared);
4079         btrfs_init_path(&path);
4080
4081 again:
4082         key.offset = 0;
4083         key.objectid = 0;
4084         key.type = BTRFS_ROOT_ITEM_KEY;
4085         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4086         if (ret < 0) {
4087                 err = 1;
4088                 goto out;
4089         }
4090         tree_node = tree_root->node;
4091         while (1) {
4092                 if (tree_node != tree_root->node) {
4093                         free_root_recs_tree(root_cache);
4094                         btrfs_release_path(&path);
4095                         goto again;
4096                 }
4097                 leaf = path.nodes[0];
4098                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4099                         ret = btrfs_next_leaf(tree_root, &path);
4100                         if (ret) {
4101                                 if (ret < 0)
4102                                         err = 1;
4103                                 break;
4104                         }
4105                         leaf = path.nodes[0];
4106                 }
4107                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4108                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4109                     fs_root_objectid(key.objectid)) {
4110                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4111                                 tmp_root = btrfs_read_fs_root_no_cache(
4112                                                 root->fs_info, &key);
4113                         } else {
4114                                 key.offset = (u64)-1;
4115                                 tmp_root = btrfs_read_fs_root(
4116                                                 root->fs_info, &key);
4117                         }
4118                         if (IS_ERR(tmp_root)) {
4119                                 err = 1;
4120                                 goto next;
4121                         }
4122                         ret = check_fs_root(tmp_root, root_cache, &wc);
4123                         if (ret == -EAGAIN) {
4124                                 free_root_recs_tree(root_cache);
4125                                 btrfs_release_path(&path);
4126                                 goto again;
4127                         }
4128                         if (ret)
4129                                 err = 1;
4130                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4131                                 btrfs_free_fs_root(tmp_root);
4132                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4133                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4134                         process_root_ref(leaf, path.slots[0], &key,
4135                                          root_cache);
4136                 }
4137 next:
4138                 path.slots[0]++;
4139         }
4140 out:
4141         btrfs_release_path(&path);
4142         if (err)
4143                 free_extent_cache_tree(&wc.shared);
4144         if (!cache_tree_empty(&wc.shared))
4145                 fprintf(stderr, "warning line %d\n", __LINE__);
4146
4147         task_stop(ctx.info);
4148
4149         return err;
4150 }
4151
4152 /*
4153  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4154  * INODE_REF/INODE_EXTREF match.
4155  *
4156  * @root:       the root of the fs/file tree
4157  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4158  * @key:        the key of the DIR_ITEM/DIR_INDEX
4159  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4160  *              distinguish root_dir between normal dir/file
4161  * @name:       the name in the INODE_REF/INODE_EXTREF
4162  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4163  * @mode:       the st_mode of INODE_ITEM
4164  *
4165  * Return 0 if no error occurred.
4166  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4167  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4168  * dir/file.
4169  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4170  * not match for normal dir/file.
4171  */
4172 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4173                          struct btrfs_key *key, u64 index, char *name,
4174                          u32 namelen, u32 mode)
4175 {
4176         struct btrfs_path path;
4177         struct extent_buffer *node;
4178         struct btrfs_dir_item *di;
4179         struct btrfs_key location;
4180         char namebuf[BTRFS_NAME_LEN] = {0};
4181         u32 total;
4182         u32 cur = 0;
4183         u32 len;
4184         u32 name_len;
4185         u32 data_len;
4186         u8 filetype;
4187         int slot;
4188         int ret;
4189
4190         btrfs_init_path(&path);
4191         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4192         if (ret < 0) {
4193                 ret = DIR_ITEM_MISSING;
4194                 goto out;
4195         }
4196
4197         /* Process root dir and goto out*/
4198         if (index == 0) {
4199                 if (ret == 0) {
4200                         ret = ROOT_DIR_ERROR;
4201                         error(
4202                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4203                                 root->objectid,
4204                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4205                                         "REF" : "EXTREF",
4206                                 ref_key->objectid, ref_key->offset,
4207                                 key->type == BTRFS_DIR_ITEM_KEY ?
4208                                         "DIR_ITEM" : "DIR_INDEX");
4209                 } else {
4210                         ret = 0;
4211                 }
4212
4213                 goto out;
4214         }
4215
4216         /* Process normal file/dir */
4217         if (ret > 0) {
4218                 ret = DIR_ITEM_MISSING;
4219                 error(
4220                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4221                         root->objectid,
4222                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4223                         ref_key->objectid, ref_key->offset,
4224                         key->type == BTRFS_DIR_ITEM_KEY ?
4225                                 "DIR_ITEM" : "DIR_INDEX",
4226                         key->objectid, key->offset, namelen, name,
4227                         imode_to_type(mode));
4228                 goto out;
4229         }
4230
4231         /* Check whether inode_id/filetype/name match */
4232         node = path.nodes[0];
4233         slot = path.slots[0];
4234         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4235         total = btrfs_item_size_nr(node, slot);
4236         while (cur < total) {
4237                 ret = DIR_ITEM_MISMATCH;
4238                 name_len = btrfs_dir_name_len(node, di);
4239                 data_len = btrfs_dir_data_len(node, di);
4240
4241                 btrfs_dir_item_key_to_cpu(node, di, &location);
4242                 if (location.objectid != ref_key->objectid ||
4243                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4244                     location.offset != 0)
4245                         goto next;
4246
4247                 filetype = btrfs_dir_type(node, di);
4248                 if (imode_to_type(mode) != filetype)
4249                         goto next;
4250
4251                 if (cur + sizeof(*di) + name_len > total ||
4252                     name_len > BTRFS_NAME_LEN) {
4253                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4254                                 root->objectid,
4255                                 key->type == BTRFS_DIR_ITEM_KEY ?
4256                                 "DIR_ITEM" : "DIR_INDEX",
4257                                 key->objectid, key->offset, name_len);
4258
4259                         if (cur + sizeof(*di) > total)
4260                                 break;
4261                         len = min_t(u32, total - cur - sizeof(*di),
4262                                     BTRFS_NAME_LEN);
4263                 } else {
4264                         len = name_len;
4265                 }
4266
4267                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4268                 if (len != namelen || strncmp(namebuf, name, len))
4269                         goto next;
4270
4271                 ret = 0;
4272                 goto out;
4273 next:
4274                 len = sizeof(*di) + name_len + data_len;
4275                 di = (struct btrfs_dir_item *)((char *)di + len);
4276                 cur += len;
4277         }
4278         if (ret == DIR_ITEM_MISMATCH)
4279                 error(
4280                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4281                         root->objectid,
4282                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4283                         ref_key->objectid, ref_key->offset,
4284                         key->type == BTRFS_DIR_ITEM_KEY ?
4285                                 "DIR_ITEM" : "DIR_INDEX",
4286                         key->objectid, key->offset, namelen, name,
4287                         imode_to_type(mode));
4288 out:
4289         btrfs_release_path(&path);
4290         return ret;
4291 }
4292
4293 /*
4294  * Traverse the given INODE_REF and call find_dir_item() to find related
4295  * DIR_ITEM/DIR_INDEX.
4296  *
4297  * @root:       the root of the fs/file tree
4298  * @ref_key:    the key of the INODE_REF
4299  * @refs:       the count of INODE_REF
4300  * @mode:       the st_mode of INODE_ITEM
4301  *
4302  * Return 0 if no error occurred.
4303  */
4304 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4305                            struct extent_buffer *node, int slot, u64 *refs,
4306                            int mode)
4307 {
4308         struct btrfs_key key;
4309         struct btrfs_inode_ref *ref;
4310         char namebuf[BTRFS_NAME_LEN] = {0};
4311         u32 total;
4312         u32 cur = 0;
4313         u32 len;
4314         u32 name_len;
4315         u64 index;
4316         int ret, err = 0;
4317
4318         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4319         total = btrfs_item_size_nr(node, slot);
4320
4321 next:
4322         /* Update inode ref count */
4323         (*refs)++;
4324
4325         index = btrfs_inode_ref_index(node, ref);
4326         name_len = btrfs_inode_ref_name_len(node, ref);
4327         if (cur + sizeof(*ref) + name_len > total ||
4328             name_len > BTRFS_NAME_LEN) {
4329                 warning("root %llu INODE_REF[%llu %llu] name too long",
4330                         root->objectid, ref_key->objectid, ref_key->offset);
4331
4332                 if (total < cur + sizeof(*ref))
4333                         goto out;
4334                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4335         } else {
4336                 len = name_len;
4337         }
4338
4339         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4340
4341         /* Check root dir ref name */
4342         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4343                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4344                       root->objectid, ref_key->objectid, ref_key->offset,
4345                       namebuf);
4346                 err |= ROOT_DIR_ERROR;
4347         }
4348
4349         /* Find related DIR_INDEX */
4350         key.objectid = ref_key->offset;
4351         key.type = BTRFS_DIR_INDEX_KEY;
4352         key.offset = index;
4353         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4354         err |= ret;
4355
4356         /* Find related dir_item */
4357         key.objectid = ref_key->offset;
4358         key.type = BTRFS_DIR_ITEM_KEY;
4359         key.offset = btrfs_name_hash(namebuf, len);
4360         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4361         err |= ret;
4362
4363         len = sizeof(*ref) + name_len;
4364         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4365         cur += len;
4366         if (cur < total)
4367                 goto next;
4368
4369 out:
4370         return err;
4371 }
4372
4373 /*
4374  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4375  * DIR_ITEM/DIR_INDEX.
4376  *
4377  * @root:       the root of the fs/file tree
4378  * @ref_key:    the key of the INODE_EXTREF
4379  * @refs:       the count of INODE_EXTREF
4380  * @mode:       the st_mode of INODE_ITEM
4381  *
4382  * Return 0 if no error occurred.
4383  */
4384 static int check_inode_extref(struct btrfs_root *root,
4385                               struct btrfs_key *ref_key,
4386                               struct extent_buffer *node, int slot, u64 *refs,
4387                               int mode)
4388 {
4389         struct btrfs_key key;
4390         struct btrfs_inode_extref *extref;
4391         char namebuf[BTRFS_NAME_LEN] = {0};
4392         u32 total;
4393         u32 cur = 0;
4394         u32 len;
4395         u32 name_len;
4396         u64 index;
4397         u64 parent;
4398         int ret;
4399         int err = 0;
4400
4401         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4402         total = btrfs_item_size_nr(node, slot);
4403
4404 next:
4405         /* update inode ref count */
4406         (*refs)++;
4407         name_len = btrfs_inode_extref_name_len(node, extref);
4408         index = btrfs_inode_extref_index(node, extref);
4409         parent = btrfs_inode_extref_parent(node, extref);
4410         if (name_len <= BTRFS_NAME_LEN) {
4411                 len = name_len;
4412         } else {
4413                 len = BTRFS_NAME_LEN;
4414                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4415                         root->objectid, ref_key->objectid, ref_key->offset);
4416         }
4417         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4418
4419         /* Check root dir ref name */
4420         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4421                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4422                       root->objectid, ref_key->objectid, ref_key->offset,
4423                       namebuf);
4424                 err |= ROOT_DIR_ERROR;
4425         }
4426
4427         /* find related dir_index */
4428         key.objectid = parent;
4429         key.type = BTRFS_DIR_INDEX_KEY;
4430         key.offset = index;
4431         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4432         err |= ret;
4433
4434         /* find related dir_item */
4435         key.objectid = parent;
4436         key.type = BTRFS_DIR_ITEM_KEY;
4437         key.offset = btrfs_name_hash(namebuf, len);
4438         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4439         err |= ret;
4440
4441         len = sizeof(*extref) + name_len;
4442         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4443         cur += len;
4444
4445         if (cur < total)
4446                 goto next;
4447
4448         return err;
4449 }
4450
4451 /*
4452  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4453  * DIR_ITEM/DIR_INDEX match.
4454  *
4455  * @root:       the root of the fs/file tree
4456  * @key:        the key of the INODE_REF/INODE_EXTREF
4457  * @name:       the name in the INODE_REF/INODE_EXTREF
4458  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4459  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4460  * to (u64)-1
4461  * @ext_ref:    the EXTENDED_IREF feature
4462  *
4463  * Return 0 if no error occurred.
4464  * Return >0 for error bitmap
4465  */
4466 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4467                           char *name, int namelen, u64 index,
4468                           unsigned int ext_ref)
4469 {
4470         struct btrfs_path path;
4471         struct btrfs_inode_ref *ref;
4472         struct btrfs_inode_extref *extref;
4473         struct extent_buffer *node;
4474         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4475         u32 total;
4476         u32 cur = 0;
4477         u32 len;
4478         u32 ref_namelen;
4479         u64 ref_index;
4480         u64 parent;
4481         u64 dir_id;
4482         int slot;
4483         int ret;
4484
4485         btrfs_init_path(&path);
4486         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4487         if (ret) {
4488                 ret = INODE_REF_MISSING;
4489                 goto extref;
4490         }
4491
4492         node = path.nodes[0];
4493         slot = path.slots[0];
4494
4495         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4496         total = btrfs_item_size_nr(node, slot);
4497
4498         /* Iterate all entry of INODE_REF */
4499         while (cur < total) {
4500                 ret = INODE_REF_MISSING;
4501
4502                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4503                 ref_index = btrfs_inode_ref_index(node, ref);
4504                 if (index != (u64)-1 && index != ref_index)
4505                         goto next_ref;
4506
4507                 if (cur + sizeof(*ref) + ref_namelen > total ||
4508                     ref_namelen > BTRFS_NAME_LEN) {
4509                         warning("root %llu INODE %s[%llu %llu] name too long",
4510                                 root->objectid,
4511                                 key->type == BTRFS_INODE_REF_KEY ?
4512                                         "REF" : "EXTREF",
4513                                 key->objectid, key->offset);
4514
4515                         if (cur + sizeof(*ref) > total)
4516                                 break;
4517                         len = min_t(u32, total - cur - sizeof(*ref),
4518                                     BTRFS_NAME_LEN);
4519                 } else {
4520                         len = ref_namelen;
4521                 }
4522
4523                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4524                                    len);
4525
4526                 if (len != namelen || strncmp(ref_namebuf, name, len))
4527                         goto next_ref;
4528
4529                 ret = 0;
4530                 goto out;
4531 next_ref:
4532                 len = sizeof(*ref) + ref_namelen;
4533                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4534                 cur += len;
4535         }
4536
4537 extref:
4538         /* Skip if not support EXTENDED_IREF feature */
4539         if (!ext_ref)
4540                 goto out;
4541
4542         btrfs_release_path(&path);
4543         btrfs_init_path(&path);
4544
4545         dir_id = key->offset;
4546         key->type = BTRFS_INODE_EXTREF_KEY;
4547         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4548
4549         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4550         if (ret) {
4551                 ret = INODE_REF_MISSING;
4552                 goto out;
4553         }
4554
4555         node = path.nodes[0];
4556         slot = path.slots[0];
4557
4558         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4559         cur = 0;
4560         total = btrfs_item_size_nr(node, slot);
4561
4562         /* Iterate all entry of INODE_EXTREF */
4563         while (cur < total) {
4564                 ret = INODE_REF_MISSING;
4565
4566                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4567                 ref_index = btrfs_inode_extref_index(node, extref);
4568                 parent = btrfs_inode_extref_parent(node, extref);
4569                 if (index != (u64)-1 && index != ref_index)
4570                         goto next_extref;
4571
4572                 if (parent != dir_id)
4573                         goto next_extref;
4574
4575                 if (ref_namelen <= BTRFS_NAME_LEN) {
4576                         len = ref_namelen;
4577                 } else {
4578                         len = BTRFS_NAME_LEN;
4579                         warning("root %llu INODE %s[%llu %llu] name too long",
4580                                 root->objectid,
4581                                 key->type == BTRFS_INODE_REF_KEY ?
4582                                         "REF" : "EXTREF",
4583                                 key->objectid, key->offset);
4584                 }
4585                 read_extent_buffer(node, ref_namebuf,
4586                                    (unsigned long)(extref + 1), len);
4587
4588                 if (len != namelen || strncmp(ref_namebuf, name, len))
4589                         goto next_extref;
4590
4591                 ret = 0;
4592                 goto out;
4593
4594 next_extref:
4595                 len = sizeof(*extref) + ref_namelen;
4596                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4597                 cur += len;
4598
4599         }
4600 out:
4601         btrfs_release_path(&path);
4602         return ret;
4603 }
4604
4605 /*
4606  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4607  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4608  *
4609  * @root:       the root of the fs/file tree
4610  * @key:        the key of the INODE_REF/INODE_EXTREF
4611  * @size:       the st_size of the INODE_ITEM
4612  * @ext_ref:    the EXTENDED_IREF feature
4613  *
4614  * Return 0 if no error occurred.
4615  */
4616 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4617                           struct extent_buffer *node, int slot, u64 *size,
4618                           unsigned int ext_ref)
4619 {
4620         struct btrfs_dir_item *di;
4621         struct btrfs_inode_item *ii;
4622         struct btrfs_path path;
4623         struct btrfs_key location;
4624         char namebuf[BTRFS_NAME_LEN] = {0};
4625         u32 total;
4626         u32 cur = 0;
4627         u32 len;
4628         u32 name_len;
4629         u32 data_len;
4630         u8 filetype;
4631         u32 mode;
4632         u64 index;
4633         int ret;
4634         int err = 0;
4635
4636         /*
4637          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4638          * ignore index check.
4639          */
4640         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4641
4642         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4643         total = btrfs_item_size_nr(node, slot);
4644
4645         while (cur < total) {
4646                 data_len = btrfs_dir_data_len(node, di);
4647                 if (data_len)
4648                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4649                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650                               "DIR_ITEM" : "DIR_INDEX",
4651                               key->objectid, key->offset, data_len);
4652
4653                 name_len = btrfs_dir_name_len(node, di);
4654                 if (cur + sizeof(*di) + name_len > total ||
4655                     name_len > BTRFS_NAME_LEN) {
4656                         warning("root %llu %s[%llu %llu] name too long",
4657                                 root->objectid,
4658                                 key->type == BTRFS_DIR_ITEM_KEY ?
4659                                 "DIR_ITEM" : "DIR_INDEX",
4660                                 key->objectid, key->offset);
4661
4662                         if (cur + sizeof(*di) > total)
4663                                 break;
4664                         len = min_t(u32, total - cur - sizeof(*di),
4665                                     BTRFS_NAME_LEN);
4666                 } else {
4667                         len = name_len;
4668                 }
4669                 (*size) += name_len;
4670
4671                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4672                 filetype = btrfs_dir_type(node, di);
4673
4674                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4675                     key->offset != btrfs_name_hash(namebuf, len)) {
4676                         err |= -EIO;
4677                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4678                                 root->objectid, key->objectid, key->offset,
4679                                 namebuf, len, filetype, key->offset,
4680                                 btrfs_name_hash(namebuf, len));
4681                 }
4682
4683                 btrfs_init_path(&path);
4684                 btrfs_dir_item_key_to_cpu(node, di, &location);
4685
4686                 /* Ignore related ROOT_ITEM check */
4687                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4688                         goto next;
4689
4690                 /* Check relative INODE_ITEM(existence/filetype) */
4691                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4692                 if (ret) {
4693                         err |= INODE_ITEM_MISSING;
4694                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4695                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4696                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4697                               key->offset, location.objectid, name_len,
4698                               namebuf, filetype);
4699                         goto next;
4700                 }
4701
4702                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4703                                     struct btrfs_inode_item);
4704                 mode = btrfs_inode_mode(path.nodes[0], ii);
4705
4706                 if (imode_to_type(mode) != filetype) {
4707                         err |= INODE_ITEM_MISMATCH;
4708                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4709                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4710                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4711                               key->offset, name_len, namebuf, filetype);
4712                 }
4713
4714                 /* Check relative INODE_REF/INODE_EXTREF */
4715                 location.type = BTRFS_INODE_REF_KEY;
4716                 location.offset = key->objectid;
4717                 ret = find_inode_ref(root, &location, namebuf, len,
4718                                        index, ext_ref);
4719                 err |= ret;
4720                 if (ret & INODE_REF_MISSING)
4721                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4722                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4723                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4724                               key->offset, name_len, namebuf, filetype);
4725
4726 next:
4727                 btrfs_release_path(&path);
4728                 len = sizeof(*di) + name_len + data_len;
4729                 di = (struct btrfs_dir_item *)((char *)di + len);
4730                 cur += len;
4731
4732                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4733                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4734                               root->objectid, key->objectid, key->offset);
4735                         break;
4736                 }
4737         }
4738
4739         return err;
4740 }
4741
4742 /*
4743  * Check file extent datasum/hole, update the size of the file extents,
4744  * check and update the last offset of the file extent.
4745  *
4746  * @root:       the root of fs/file tree.
4747  * @fkey:       the key of the file extent.
4748  * @nodatasum:  INODE_NODATASUM feature.
4749  * @size:       the sum of all EXTENT_DATA items size for this inode.
4750  * @end:        the offset of the last extent.
4751  *
4752  * Return 0 if no error occurred.
4753  */
4754 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4755                              struct extent_buffer *node, int slot,
4756                              unsigned int nodatasum, u64 *size, u64 *end)
4757 {
4758         struct btrfs_file_extent_item *fi;
4759         u64 disk_bytenr;
4760         u64 disk_num_bytes;
4761         u64 extent_num_bytes;
4762         u64 extent_offset;
4763         u64 csum_found;         /* In byte size, sectorsize aligned */
4764         u64 search_start;       /* Logical range start we search for csum */
4765         u64 search_len;         /* Logical range len we search for csum */
4766         unsigned int extent_type;
4767         unsigned int is_hole;
4768         int compressed = 0;
4769         int ret;
4770         int err = 0;
4771
4772         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4773
4774         /* Check inline extent */
4775         extent_type = btrfs_file_extent_type(node, fi);
4776         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4777                 struct btrfs_item *e = btrfs_item_nr(slot);
4778                 u32 item_inline_len;
4779
4780                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4781                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4782                 compressed = btrfs_file_extent_compression(node, fi);
4783                 if (extent_num_bytes == 0) {
4784                         error(
4785                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4786                                 root->objectid, fkey->objectid, fkey->offset);
4787                         err |= FILE_EXTENT_ERROR;
4788                 }
4789                 if (!compressed && extent_num_bytes != item_inline_len) {
4790                         error(
4791                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4792                                 root->objectid, fkey->objectid, fkey->offset,
4793                                 extent_num_bytes, item_inline_len);
4794                         err |= FILE_EXTENT_ERROR;
4795                 }
4796                 *end += extent_num_bytes;
4797                 *size += extent_num_bytes;
4798                 return err;
4799         }
4800
4801         /* Check extent type */
4802         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4803                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4804                 err |= FILE_EXTENT_ERROR;
4805                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4806                       root->objectid, fkey->objectid, fkey->offset);
4807                 return err;
4808         }
4809
4810         /* Check REG_EXTENT/PREALLOC_EXTENT */
4811         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4812         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4813         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4814         extent_offset = btrfs_file_extent_offset(node, fi);
4815         compressed = btrfs_file_extent_compression(node, fi);
4816         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4817
4818         /*
4819          * Check EXTENT_DATA csum
4820          *
4821          * For plain (uncompressed) extent, we should only check the range
4822          * we're referring to, as it's possible that part of prealloc extent
4823          * has been written, and has csum:
4824          *
4825          * |<--- Original large preallocated extent A ---->|
4826          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4827          *      No csum                         Has csum
4828          *
4829          * For compressed extent, we should check the whole range.
4830          */
4831         if (!compressed) {
4832                 search_start = disk_bytenr + extent_offset;
4833                 search_len = extent_num_bytes;
4834         } else {
4835                 search_start = disk_bytenr;
4836                 search_len = disk_num_bytes;
4837         }
4838         ret = count_csum_range(root, search_start, search_len, &csum_found);
4839         if (csum_found > 0 && nodatasum) {
4840                 err |= ODD_CSUM_ITEM;
4841                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4842                       root->objectid, fkey->objectid, fkey->offset);
4843         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4844                    !is_hole && (ret < 0 || csum_found < search_len)) {
4845                 err |= CSUM_ITEM_MISSING;
4846                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4847                       root->objectid, fkey->objectid, fkey->offset,
4848                       csum_found, search_len);
4849         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4850                 err |= ODD_CSUM_ITEM;
4851                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4852                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4853         }
4854
4855         /* Check EXTENT_DATA hole */
4856         if (!no_holes && *end != fkey->offset) {
4857                 err |= FILE_EXTENT_ERROR;
4858                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4859                       root->objectid, fkey->objectid, fkey->offset);
4860         }
4861
4862         *end += extent_num_bytes;
4863         if (!is_hole)
4864                 *size += extent_num_bytes;
4865
4866         return err;
4867 }
4868
4869 /*
4870  * Check INODE_ITEM and related ITEMs (the same inode number)
4871  * 1. check link count
4872  * 2. check inode ref/extref
4873  * 3. check dir item/index
4874  *
4875  * @ext_ref:    the EXTENDED_IREF feature
4876  *
4877  * Return 0 if no error occurred.
4878  * Return >0 for error or hit the traversal is done(by error bitmap)
4879  */
4880 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4881                             unsigned int ext_ref)
4882 {
4883         struct extent_buffer *node;
4884         struct btrfs_inode_item *ii;
4885         struct btrfs_key key;
4886         u64 inode_id;
4887         u32 mode;
4888         u64 nlink;
4889         u64 nbytes;
4890         u64 isize;
4891         u64 size = 0;
4892         u64 refs = 0;
4893         u64 extent_end = 0;
4894         u64 extent_size = 0;
4895         unsigned int dir;
4896         unsigned int nodatasum;
4897         int slot;
4898         int ret;
4899         int err = 0;
4900
4901         node = path->nodes[0];
4902         slot = path->slots[0];
4903
4904         btrfs_item_key_to_cpu(node, &key, slot);
4905         inode_id = key.objectid;
4906
4907         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4908                 ret = btrfs_next_item(root, path);
4909                 if (ret > 0)
4910                         err |= LAST_ITEM;
4911                 return err;
4912         }
4913
4914         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4915         isize = btrfs_inode_size(node, ii);
4916         nbytes = btrfs_inode_nbytes(node, ii);
4917         mode = btrfs_inode_mode(node, ii);
4918         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4919         nlink = btrfs_inode_nlink(node, ii);
4920         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4921
4922         while (1) {
4923                 ret = btrfs_next_item(root, path);
4924                 if (ret < 0) {
4925                         /* out will fill 'err' rusing current statistics */
4926                         goto out;
4927                 } else if (ret > 0) {
4928                         err |= LAST_ITEM;
4929                         goto out;
4930                 }
4931
4932                 node = path->nodes[0];
4933                 slot = path->slots[0];
4934                 btrfs_item_key_to_cpu(node, &key, slot);
4935                 if (key.objectid != inode_id)
4936                         goto out;
4937
4938                 switch (key.type) {
4939                 case BTRFS_INODE_REF_KEY:
4940                         ret = check_inode_ref(root, &key, node, slot, &refs,
4941                                               mode);
4942                         err |= ret;
4943                         break;
4944                 case BTRFS_INODE_EXTREF_KEY:
4945                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4946                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4947                                         root->objectid, key.objectid,
4948                                         key.offset);
4949                         ret = check_inode_extref(root, &key, node, slot, &refs,
4950                                                  mode);
4951                         err |= ret;
4952                         break;
4953                 case BTRFS_DIR_ITEM_KEY:
4954                 case BTRFS_DIR_INDEX_KEY:
4955                         if (!dir) {
4956                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4957                                         root->objectid, inode_id,
4958                                         imode_to_type(mode), key.objectid,
4959                                         key.offset);
4960                         }
4961                         ret = check_dir_item(root, &key, node, slot, &size,
4962                                              ext_ref);
4963                         err |= ret;
4964                         break;
4965                 case BTRFS_EXTENT_DATA_KEY:
4966                         if (dir) {
4967                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4968                                         root->objectid, inode_id, key.objectid,
4969                                         key.offset);
4970                         }
4971                         ret = check_file_extent(root, &key, node, slot,
4972                                                 nodatasum, &extent_size,
4973                                                 &extent_end);
4974                         err |= ret;
4975                         break;
4976                 case BTRFS_XATTR_ITEM_KEY:
4977                         break;
4978                 default:
4979                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4980                               key.objectid, key.type, key.offset);
4981                 }
4982         }
4983
4984 out:
4985         /* verify INODE_ITEM nlink/isize/nbytes */
4986         if (dir) {
4987                 if (nlink != 1) {
4988                         err |= LINK_COUNT_ERROR;
4989                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4990                               root->objectid, inode_id, nlink);
4991                 }
4992
4993                 /*
4994                  * Just a warning, as dir inode nbytes is just an
4995                  * instructive value.
4996                  */
4997                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4998                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4999                                 root->objectid, inode_id,
5000                                 root->fs_info->nodesize);
5001                 }
5002
5003                 if (isize != size) {
5004                         err |= ISIZE_ERROR;
5005                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5006                               root->objectid, inode_id, isize, size);
5007                 }
5008         } else {
5009                 if (nlink != refs) {
5010                         err |= LINK_COUNT_ERROR;
5011                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5012                               root->objectid, inode_id, nlink, refs);
5013                 } else if (!nlink) {
5014                         err |= ORPHAN_ITEM;
5015                 }
5016
5017                 if (!nbytes && !no_holes && extent_end < isize) {
5018                         err |= NBYTES_ERROR;
5019                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5020                               root->objectid, inode_id, isize);
5021                 }
5022
5023                 if (nbytes != extent_size) {
5024                         err |= NBYTES_ERROR;
5025                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5026                               root->objectid, inode_id, nbytes, extent_size);
5027                 }
5028         }
5029
5030         return err;
5031 }
5032
5033 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5034 {
5035         struct btrfs_path path;
5036         struct btrfs_key key;
5037         int err = 0;
5038         int ret;
5039
5040         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5041         key.type = BTRFS_INODE_ITEM_KEY;
5042         key.offset = 0;
5043
5044         /* For root being dropped, we don't need to check first inode */
5045         if (btrfs_root_refs(&root->root_item) == 0 &&
5046             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5047             key.objectid)
5048                 return 0;
5049
5050         btrfs_init_path(&path);
5051
5052         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5053         if (ret < 0)
5054                 goto out;
5055         if (ret > 0) {
5056                 ret = 0;
5057                 err |= INODE_ITEM_MISSING;
5058                 error("first inode item of root %llu is missing",
5059                       root->objectid);
5060         }
5061
5062         err |= check_inode_item(root, &path, ext_ref);
5063         err &= ~LAST_ITEM;
5064         if (err && !ret)
5065                 ret = -EIO;
5066 out:
5067         btrfs_release_path(&path);
5068         return ret;
5069 }
5070
5071 /*
5072  * Iterate all item on the tree and call check_inode_item() to check.
5073  *
5074  * @root:       the root of the tree to be checked.
5075  * @ext_ref:    the EXTENDED_IREF feature
5076  *
5077  * Return 0 if no error found.
5078  * Return <0 for error.
5079  */
5080 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5081 {
5082         struct btrfs_path path;
5083         struct node_refs nrefs;
5084         struct btrfs_root_item *root_item = &root->root_item;
5085         int ret;
5086         int level;
5087         int err = 0;
5088
5089         /*
5090          * We need to manually check the first inode item(256)
5091          * As the following traversal function will only start from
5092          * the first inode item in the leaf, if inode item(256) is missing
5093          * we will just skip it forever.
5094          */
5095         ret = check_fs_first_inode(root, ext_ref);
5096         if (ret < 0)
5097                 return ret;
5098
5099         memset(&nrefs, 0, sizeof(nrefs));
5100         level = btrfs_header_level(root->node);
5101         btrfs_init_path(&path);
5102
5103         if (btrfs_root_refs(root_item) > 0 ||
5104             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5105                 path.nodes[level] = root->node;
5106                 path.slots[level] = 0;
5107                 extent_buffer_get(root->node);
5108         } else {
5109                 struct btrfs_key key;
5110
5111                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5112                 level = root_item->drop_level;
5113                 path.lowest_level = level;
5114                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5115                 if (ret < 0)
5116                         goto out;
5117                 ret = 0;
5118         }
5119
5120         while (1) {
5121                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5122                 err |= !!ret;
5123
5124                 /* if ret is negative, walk shall stop */
5125                 if (ret < 0) {
5126                         ret = err;
5127                         break;
5128                 }
5129
5130                 ret = walk_up_tree_v2(root, &path, &level);
5131                 if (ret != 0) {
5132                         /* Normal exit, reset ret to err */
5133                         ret = err;
5134                         break;
5135                 }
5136         }
5137
5138 out:
5139         btrfs_release_path(&path);
5140         return ret;
5141 }
5142
5143 /*
5144  * Find the relative ref for root_ref and root_backref.
5145  *
5146  * @root:       the root of the root tree.
5147  * @ref_key:    the key of the root ref.
5148  *
5149  * Return 0 if no error occurred.
5150  */
5151 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5152                           struct extent_buffer *node, int slot)
5153 {
5154         struct btrfs_path path;
5155         struct btrfs_key key;
5156         struct btrfs_root_ref *ref;
5157         struct btrfs_root_ref *backref;
5158         char ref_name[BTRFS_NAME_LEN] = {0};
5159         char backref_name[BTRFS_NAME_LEN] = {0};
5160         u64 ref_dirid;
5161         u64 ref_seq;
5162         u32 ref_namelen;
5163         u64 backref_dirid;
5164         u64 backref_seq;
5165         u32 backref_namelen;
5166         u32 len;
5167         int ret;
5168         int err = 0;
5169
5170         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5171         ref_dirid = btrfs_root_ref_dirid(node, ref);
5172         ref_seq = btrfs_root_ref_sequence(node, ref);
5173         ref_namelen = btrfs_root_ref_name_len(node, ref);
5174
5175         if (ref_namelen <= BTRFS_NAME_LEN) {
5176                 len = ref_namelen;
5177         } else {
5178                 len = BTRFS_NAME_LEN;
5179                 warning("%s[%llu %llu] ref_name too long",
5180                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5181                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5182                         ref_key->offset);
5183         }
5184         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5185
5186         /* Find relative root_ref */
5187         key.objectid = ref_key->offset;
5188         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5189         key.offset = ref_key->objectid;
5190
5191         btrfs_init_path(&path);
5192         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5193         if (ret) {
5194                 err |= ROOT_REF_MISSING;
5195                 error("%s[%llu %llu] couldn't find relative ref",
5196                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5197                       "ROOT_REF" : "ROOT_BACKREF",
5198                       ref_key->objectid, ref_key->offset);
5199                 goto out;
5200         }
5201
5202         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5203                                  struct btrfs_root_ref);
5204         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5205         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5206         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5207
5208         if (backref_namelen <= BTRFS_NAME_LEN) {
5209                 len = backref_namelen;
5210         } else {
5211                 len = BTRFS_NAME_LEN;
5212                 warning("%s[%llu %llu] ref_name too long",
5213                         key.type == BTRFS_ROOT_REF_KEY ?
5214                         "ROOT_REF" : "ROOT_BACKREF",
5215                         key.objectid, key.offset);
5216         }
5217         read_extent_buffer(path.nodes[0], backref_name,
5218                            (unsigned long)(backref + 1), len);
5219
5220         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5221             ref_namelen != backref_namelen ||
5222             strncmp(ref_name, backref_name, len)) {
5223                 err |= ROOT_REF_MISMATCH;
5224                 error("%s[%llu %llu] mismatch relative ref",
5225                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5226                       "ROOT_REF" : "ROOT_BACKREF",
5227                       ref_key->objectid, ref_key->offset);
5228         }
5229 out:
5230         btrfs_release_path(&path);
5231         return err;
5232 }
5233
5234 /*
5235  * Check all fs/file tree in low_memory mode.
5236  *
5237  * 1. for fs tree root item, call check_fs_root_v2()
5238  * 2. for fs tree root ref/backref, call check_root_ref()
5239  *
5240  * Return 0 if no error occurred.
5241  */
5242 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5243 {
5244         struct btrfs_root *tree_root = fs_info->tree_root;
5245         struct btrfs_root *cur_root = NULL;
5246         struct btrfs_path path;
5247         struct btrfs_key key;
5248         struct extent_buffer *node;
5249         unsigned int ext_ref;
5250         int slot;
5251         int ret;
5252         int err = 0;
5253
5254         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5255
5256         btrfs_init_path(&path);
5257         key.objectid = BTRFS_FS_TREE_OBJECTID;
5258         key.offset = 0;
5259         key.type = BTRFS_ROOT_ITEM_KEY;
5260
5261         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5262         if (ret < 0) {
5263                 err = ret;
5264                 goto out;
5265         } else if (ret > 0) {
5266                 err = -ENOENT;
5267                 goto out;
5268         }
5269
5270         while (1) {
5271                 node = path.nodes[0];
5272                 slot = path.slots[0];
5273                 btrfs_item_key_to_cpu(node, &key, slot);
5274                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5275                         goto out;
5276                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5277                     fs_root_objectid(key.objectid)) {
5278                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5279                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5280                                                                        &key);
5281                         } else {
5282                                 key.offset = (u64)-1;
5283                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5284                         }
5285
5286                         if (IS_ERR(cur_root)) {
5287                                 error("Fail to read fs/subvol tree: %lld",
5288                                       key.objectid);
5289                                 err = -EIO;
5290                                 goto next;
5291                         }
5292
5293                         ret = check_fs_root_v2(cur_root, ext_ref);
5294                         err |= ret;
5295
5296                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5297                                 btrfs_free_fs_root(cur_root);
5298                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5299                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5300                         ret = check_root_ref(tree_root, &key, node, slot);
5301                         err |= ret;
5302                 }
5303 next:
5304                 ret = btrfs_next_item(tree_root, &path);
5305                 if (ret > 0)
5306                         goto out;
5307                 if (ret < 0) {
5308                         err = ret;
5309                         goto out;
5310                 }
5311         }
5312
5313 out:
5314         btrfs_release_path(&path);
5315         return err;
5316 }
5317
5318 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5319 {
5320         struct list_head *cur = rec->backrefs.next;
5321         struct extent_backref *back;
5322         struct tree_backref *tback;
5323         struct data_backref *dback;
5324         u64 found = 0;
5325         int err = 0;
5326
5327         while(cur != &rec->backrefs) {
5328                 back = to_extent_backref(cur);
5329                 cur = cur->next;
5330                 if (!back->found_extent_tree) {
5331                         err = 1;
5332                         if (!print_errs)
5333                                 goto out;
5334                         if (back->is_data) {
5335                                 dback = to_data_backref(back);
5336                                 fprintf(stderr, "Backref %llu %s %llu"
5337                                         " owner %llu offset %llu num_refs %lu"
5338                                         " not found in extent tree\n",
5339                                         (unsigned long long)rec->start,
5340                                         back->full_backref ?
5341                                         "parent" : "root",
5342                                         back->full_backref ?
5343                                         (unsigned long long)dback->parent:
5344                                         (unsigned long long)dback->root,
5345                                         (unsigned long long)dback->owner,
5346                                         (unsigned long long)dback->offset,
5347                                         (unsigned long)dback->num_refs);
5348                         } else {
5349                                 tback = to_tree_backref(back);
5350                                 fprintf(stderr, "Backref %llu parent %llu"
5351                                         " root %llu not found in extent tree\n",
5352                                         (unsigned long long)rec->start,
5353                                         (unsigned long long)tback->parent,
5354                                         (unsigned long long)tback->root);
5355                         }
5356                 }
5357                 if (!back->is_data && !back->found_ref) {
5358                         err = 1;
5359                         if (!print_errs)
5360                                 goto out;
5361                         tback = to_tree_backref(back);
5362                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5363                                 (unsigned long long)rec->start,
5364                                 back->full_backref ? "parent" : "root",
5365                                 back->full_backref ?
5366                                 (unsigned long long)tback->parent :
5367                                 (unsigned long long)tback->root, back);
5368                 }
5369                 if (back->is_data) {
5370                         dback = to_data_backref(back);
5371                         if (dback->found_ref != dback->num_refs) {
5372                                 err = 1;
5373                                 if (!print_errs)
5374                                         goto out;
5375                                 fprintf(stderr, "Incorrect local backref count"
5376                                         " on %llu %s %llu owner %llu"
5377                                         " offset %llu found %u wanted %u back %p\n",
5378                                         (unsigned long long)rec->start,
5379                                         back->full_backref ?
5380                                         "parent" : "root",
5381                                         back->full_backref ?
5382                                         (unsigned long long)dback->parent:
5383                                         (unsigned long long)dback->root,
5384                                         (unsigned long long)dback->owner,
5385                                         (unsigned long long)dback->offset,
5386                                         dback->found_ref, dback->num_refs, back);
5387                         }
5388                         if (dback->disk_bytenr != rec->start) {
5389                                 err = 1;
5390                                 if (!print_errs)
5391                                         goto out;
5392                                 fprintf(stderr, "Backref disk bytenr does not"
5393                                         " match extent record, bytenr=%llu, "
5394                                         "ref bytenr=%llu\n",
5395                                         (unsigned long long)rec->start,
5396                                         (unsigned long long)dback->disk_bytenr);
5397                         }
5398
5399                         if (dback->bytes != rec->nr) {
5400                                 err = 1;
5401                                 if (!print_errs)
5402                                         goto out;
5403                                 fprintf(stderr, "Backref bytes do not match "
5404                                         "extent backref, bytenr=%llu, ref "
5405                                         "bytes=%llu, backref bytes=%llu\n",
5406                                         (unsigned long long)rec->start,
5407                                         (unsigned long long)rec->nr,
5408                                         (unsigned long long)dback->bytes);
5409                         }
5410                 }
5411                 if (!back->is_data) {
5412                         found += 1;
5413                 } else {
5414                         dback = to_data_backref(back);
5415                         found += dback->found_ref;
5416                 }
5417         }
5418         if (found != rec->refs) {
5419                 err = 1;
5420                 if (!print_errs)
5421                         goto out;
5422                 fprintf(stderr, "Incorrect global backref count "
5423                         "on %llu found %llu wanted %llu\n",
5424                         (unsigned long long)rec->start,
5425                         (unsigned long long)found,
5426                         (unsigned long long)rec->refs);
5427         }
5428 out:
5429         return err;
5430 }
5431
5432 static int free_all_extent_backrefs(struct extent_record *rec)
5433 {
5434         struct extent_backref *back;
5435         struct list_head *cur;
5436         while (!list_empty(&rec->backrefs)) {
5437                 cur = rec->backrefs.next;
5438                 back = to_extent_backref(cur);
5439                 list_del(cur);
5440                 free(back);
5441         }
5442         return 0;
5443 }
5444
5445 static void free_extent_record_cache(struct cache_tree *extent_cache)
5446 {
5447         struct cache_extent *cache;
5448         struct extent_record *rec;
5449
5450         while (1) {
5451                 cache = first_cache_extent(extent_cache);
5452                 if (!cache)
5453                         break;
5454                 rec = container_of(cache, struct extent_record, cache);
5455                 remove_cache_extent(extent_cache, cache);
5456                 free_all_extent_backrefs(rec);
5457                 free(rec);
5458         }
5459 }
5460
5461 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5462                                  struct extent_record *rec)
5463 {
5464         if (rec->content_checked && rec->owner_ref_checked &&
5465             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5466             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5467             !rec->bad_full_backref && !rec->crossing_stripes &&
5468             !rec->wrong_chunk_type) {
5469                 remove_cache_extent(extent_cache, &rec->cache);
5470                 free_all_extent_backrefs(rec);
5471                 list_del_init(&rec->list);
5472                 free(rec);
5473         }
5474         return 0;
5475 }
5476
5477 static int check_owner_ref(struct btrfs_root *root,
5478                             struct extent_record *rec,
5479                             struct extent_buffer *buf)
5480 {
5481         struct extent_backref *node;
5482         struct tree_backref *back;
5483         struct btrfs_root *ref_root;
5484         struct btrfs_key key;
5485         struct btrfs_path path;
5486         struct extent_buffer *parent;
5487         int level;
5488         int found = 0;
5489         int ret;
5490
5491         list_for_each_entry(node, &rec->backrefs, list) {
5492                 if (node->is_data)
5493                         continue;
5494                 if (!node->found_ref)
5495                         continue;
5496                 if (node->full_backref)
5497                         continue;
5498                 back = to_tree_backref(node);
5499                 if (btrfs_header_owner(buf) == back->root)
5500                         return 0;
5501         }
5502         BUG_ON(rec->is_root);
5503
5504         /* try to find the block by search corresponding fs tree */
5505         key.objectid = btrfs_header_owner(buf);
5506         key.type = BTRFS_ROOT_ITEM_KEY;
5507         key.offset = (u64)-1;
5508
5509         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5510         if (IS_ERR(ref_root))
5511                 return 1;
5512
5513         level = btrfs_header_level(buf);
5514         if (level == 0)
5515                 btrfs_item_key_to_cpu(buf, &key, 0);
5516         else
5517                 btrfs_node_key_to_cpu(buf, &key, 0);
5518
5519         btrfs_init_path(&path);
5520         path.lowest_level = level + 1;
5521         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5522         if (ret < 0)
5523                 return 0;
5524
5525         parent = path.nodes[level + 1];
5526         if (parent && buf->start == btrfs_node_blockptr(parent,
5527                                                         path.slots[level + 1]))
5528                 found = 1;
5529
5530         btrfs_release_path(&path);
5531         return found ? 0 : 1;
5532 }
5533
5534 static int is_extent_tree_record(struct extent_record *rec)
5535 {
5536         struct list_head *cur = rec->backrefs.next;
5537         struct extent_backref *node;
5538         struct tree_backref *back;
5539         int is_extent = 0;
5540
5541         while(cur != &rec->backrefs) {
5542                 node = to_extent_backref(cur);
5543                 cur = cur->next;
5544                 if (node->is_data)
5545                         return 0;
5546                 back = to_tree_backref(node);
5547                 if (node->full_backref)
5548                         return 0;
5549                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5550                         is_extent = 1;
5551         }
5552         return is_extent;
5553 }
5554
5555
5556 static int record_bad_block_io(struct btrfs_fs_info *info,
5557                                struct cache_tree *extent_cache,
5558                                u64 start, u64 len)
5559 {
5560         struct extent_record *rec;
5561         struct cache_extent *cache;
5562         struct btrfs_key key;
5563
5564         cache = lookup_cache_extent(extent_cache, start, len);
5565         if (!cache)
5566                 return 0;
5567
5568         rec = container_of(cache, struct extent_record, cache);
5569         if (!is_extent_tree_record(rec))
5570                 return 0;
5571
5572         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5573         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5574 }
5575
5576 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5577                        struct extent_buffer *buf, int slot)
5578 {
5579         if (btrfs_header_level(buf)) {
5580                 struct btrfs_key_ptr ptr1, ptr2;
5581
5582                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5583                                    sizeof(struct btrfs_key_ptr));
5584                 read_extent_buffer(buf, &ptr2,
5585                                    btrfs_node_key_ptr_offset(slot + 1),
5586                                    sizeof(struct btrfs_key_ptr));
5587                 write_extent_buffer(buf, &ptr1,
5588                                     btrfs_node_key_ptr_offset(slot + 1),
5589                                     sizeof(struct btrfs_key_ptr));
5590                 write_extent_buffer(buf, &ptr2,
5591                                     btrfs_node_key_ptr_offset(slot),
5592                                     sizeof(struct btrfs_key_ptr));
5593                 if (slot == 0) {
5594                         struct btrfs_disk_key key;
5595                         btrfs_node_key(buf, &key, 0);
5596                         btrfs_fixup_low_keys(root, path, &key,
5597                                              btrfs_header_level(buf) + 1);
5598                 }
5599         } else {
5600                 struct btrfs_item *item1, *item2;
5601                 struct btrfs_key k1, k2;
5602                 char *item1_data, *item2_data;
5603                 u32 item1_offset, item2_offset, item1_size, item2_size;
5604
5605                 item1 = btrfs_item_nr(slot);
5606                 item2 = btrfs_item_nr(slot + 1);
5607                 btrfs_item_key_to_cpu(buf, &k1, slot);
5608                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5609                 item1_offset = btrfs_item_offset(buf, item1);
5610                 item2_offset = btrfs_item_offset(buf, item2);
5611                 item1_size = btrfs_item_size(buf, item1);
5612                 item2_size = btrfs_item_size(buf, item2);
5613
5614                 item1_data = malloc(item1_size);
5615                 if (!item1_data)
5616                         return -ENOMEM;
5617                 item2_data = malloc(item2_size);
5618                 if (!item2_data) {
5619                         free(item1_data);
5620                         return -ENOMEM;
5621                 }
5622
5623                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5624                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5625
5626                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5627                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5628                 free(item1_data);
5629                 free(item2_data);
5630
5631                 btrfs_set_item_offset(buf, item1, item2_offset);
5632                 btrfs_set_item_offset(buf, item2, item1_offset);
5633                 btrfs_set_item_size(buf, item1, item2_size);
5634                 btrfs_set_item_size(buf, item2, item1_size);
5635
5636                 path->slots[0] = slot;
5637                 btrfs_set_item_key_unsafe(root, path, &k2);
5638                 path->slots[0] = slot + 1;
5639                 btrfs_set_item_key_unsafe(root, path, &k1);
5640         }
5641         return 0;
5642 }
5643
5644 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5645 {
5646         struct extent_buffer *buf;
5647         struct btrfs_key k1, k2;
5648         int i;
5649         int level = path->lowest_level;
5650         int ret = -EIO;
5651
5652         buf = path->nodes[level];
5653         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5654                 if (level) {
5655                         btrfs_node_key_to_cpu(buf, &k1, i);
5656                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5657                 } else {
5658                         btrfs_item_key_to_cpu(buf, &k1, i);
5659                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5660                 }
5661                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5662                         continue;
5663                 ret = swap_values(root, path, buf, i);
5664                 if (ret)
5665                         break;
5666                 btrfs_mark_buffer_dirty(buf);
5667                 i = 0;
5668         }
5669         return ret;
5670 }
5671
5672 static int delete_bogus_item(struct btrfs_root *root,
5673                              struct btrfs_path *path,
5674                              struct extent_buffer *buf, int slot)
5675 {
5676         struct btrfs_key key;
5677         int nritems = btrfs_header_nritems(buf);
5678
5679         btrfs_item_key_to_cpu(buf, &key, slot);
5680
5681         /* These are all the keys we can deal with missing. */
5682         if (key.type != BTRFS_DIR_INDEX_KEY &&
5683             key.type != BTRFS_EXTENT_ITEM_KEY &&
5684             key.type != BTRFS_METADATA_ITEM_KEY &&
5685             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5686             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5687                 return -1;
5688
5689         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5690                (unsigned long long)key.objectid, key.type,
5691                (unsigned long long)key.offset, slot, buf->start);
5692         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5693                               btrfs_item_nr_offset(slot + 1),
5694                               sizeof(struct btrfs_item) *
5695                               (nritems - slot - 1));
5696         btrfs_set_header_nritems(buf, nritems - 1);
5697         if (slot == 0) {
5698                 struct btrfs_disk_key disk_key;
5699
5700                 btrfs_item_key(buf, &disk_key, 0);
5701                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5702         }
5703         btrfs_mark_buffer_dirty(buf);
5704         return 0;
5705 }
5706
5707 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5708 {
5709         struct extent_buffer *buf;
5710         int i;
5711         int ret = 0;
5712
5713         /* We should only get this for leaves */
5714         BUG_ON(path->lowest_level);
5715         buf = path->nodes[0];
5716 again:
5717         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5718                 unsigned int shift = 0, offset;
5719
5720                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5721                     BTRFS_LEAF_DATA_SIZE(root)) {
5722                         if (btrfs_item_end_nr(buf, i) >
5723                             BTRFS_LEAF_DATA_SIZE(root)) {
5724                                 ret = delete_bogus_item(root, path, buf, i);
5725                                 if (!ret)
5726                                         goto again;
5727                                 fprintf(stderr, "item is off the end of the "
5728                                         "leaf, can't fix\n");
5729                                 ret = -EIO;
5730                                 break;
5731                         }
5732                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5733                                 btrfs_item_end_nr(buf, i);
5734                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5735                            btrfs_item_offset_nr(buf, i - 1)) {
5736                         if (btrfs_item_end_nr(buf, i) >
5737                             btrfs_item_offset_nr(buf, i - 1)) {
5738                                 ret = delete_bogus_item(root, path, buf, i);
5739                                 if (!ret)
5740                                         goto again;
5741                                 fprintf(stderr, "items overlap, can't fix\n");
5742                                 ret = -EIO;
5743                                 break;
5744                         }
5745                         shift = btrfs_item_offset_nr(buf, i - 1) -
5746                                 btrfs_item_end_nr(buf, i);
5747                 }
5748                 if (!shift)
5749                         continue;
5750
5751                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5752                        i, shift, (unsigned long long)buf->start);
5753                 offset = btrfs_item_offset_nr(buf, i);
5754                 memmove_extent_buffer(buf,
5755                                       btrfs_leaf_data(buf) + offset + shift,
5756                                       btrfs_leaf_data(buf) + offset,
5757                                       btrfs_item_size_nr(buf, i));
5758                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5759                                       offset + shift);
5760                 btrfs_mark_buffer_dirty(buf);
5761         }
5762
5763         /*
5764          * We may have moved things, in which case we want to exit so we don't
5765          * write those changes out.  Once we have proper abort functionality in
5766          * progs this can be changed to something nicer.
5767          */
5768         BUG_ON(ret);
5769         return ret;
5770 }
5771
5772 /*
5773  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5774  * then just return -EIO.
5775  */
5776 static int try_to_fix_bad_block(struct btrfs_root *root,
5777                                 struct extent_buffer *buf,
5778                                 enum btrfs_tree_block_status status)
5779 {
5780         struct btrfs_trans_handle *trans;
5781         struct ulist *roots;
5782         struct ulist_node *node;
5783         struct btrfs_root *search_root;
5784         struct btrfs_path path;
5785         struct ulist_iterator iter;
5786         struct btrfs_key root_key, key;
5787         int ret;
5788
5789         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5790             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5791                 return -EIO;
5792
5793         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5794         if (ret)
5795                 return -EIO;
5796
5797         btrfs_init_path(&path);
5798         ULIST_ITER_INIT(&iter);
5799         while ((node = ulist_next(roots, &iter))) {
5800                 root_key.objectid = node->val;
5801                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5802                 root_key.offset = (u64)-1;
5803
5804                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5805                 if (IS_ERR(root)) {
5806                         ret = -EIO;
5807                         break;
5808                 }
5809
5810
5811                 trans = btrfs_start_transaction(search_root, 0);
5812                 if (IS_ERR(trans)) {
5813                         ret = PTR_ERR(trans);
5814                         break;
5815                 }
5816
5817                 path.lowest_level = btrfs_header_level(buf);
5818                 path.skip_check_block = 1;
5819                 if (path.lowest_level)
5820                         btrfs_node_key_to_cpu(buf, &key, 0);
5821                 else
5822                         btrfs_item_key_to_cpu(buf, &key, 0);
5823                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5824                 if (ret) {
5825                         ret = -EIO;
5826                         btrfs_commit_transaction(trans, search_root);
5827                         break;
5828                 }
5829                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5830                         ret = fix_key_order(search_root, &path);
5831                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5832                         ret = fix_item_offset(search_root, &path);
5833                 if (ret) {
5834                         btrfs_commit_transaction(trans, search_root);
5835                         break;
5836                 }
5837                 btrfs_release_path(&path);
5838                 btrfs_commit_transaction(trans, search_root);
5839         }
5840         ulist_free(roots);
5841         btrfs_release_path(&path);
5842         return ret;
5843 }
5844
5845 static int check_block(struct btrfs_root *root,
5846                        struct cache_tree *extent_cache,
5847                        struct extent_buffer *buf, u64 flags)
5848 {
5849         struct extent_record *rec;
5850         struct cache_extent *cache;
5851         struct btrfs_key key;
5852         enum btrfs_tree_block_status status;
5853         int ret = 0;
5854         int level;
5855
5856         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5857         if (!cache)
5858                 return 1;
5859         rec = container_of(cache, struct extent_record, cache);
5860         rec->generation = btrfs_header_generation(buf);
5861
5862         level = btrfs_header_level(buf);
5863         if (btrfs_header_nritems(buf) > 0) {
5864
5865                 if (level == 0)
5866                         btrfs_item_key_to_cpu(buf, &key, 0);
5867                 else
5868                         btrfs_node_key_to_cpu(buf, &key, 0);
5869
5870                 rec->info_objectid = key.objectid;
5871         }
5872         rec->info_level = level;
5873
5874         if (btrfs_is_leaf(buf))
5875                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5876         else
5877                 status = btrfs_check_node(root, &rec->parent_key, buf);
5878
5879         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5880                 if (repair)
5881                         status = try_to_fix_bad_block(root, buf, status);
5882                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5883                         ret = -EIO;
5884                         fprintf(stderr, "bad block %llu\n",
5885                                 (unsigned long long)buf->start);
5886                 } else {
5887                         /*
5888                          * Signal to callers we need to start the scan over
5889                          * again since we'll have cowed blocks.
5890                          */
5891                         ret = -EAGAIN;
5892                 }
5893         } else {
5894                 rec->content_checked = 1;
5895                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5896                         rec->owner_ref_checked = 1;
5897                 else {
5898                         ret = check_owner_ref(root, rec, buf);
5899                         if (!ret)
5900                                 rec->owner_ref_checked = 1;
5901                 }
5902         }
5903         if (!ret)
5904                 maybe_free_extent_rec(extent_cache, rec);
5905         return ret;
5906 }
5907
5908 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5909                                                 u64 parent, u64 root)
5910 {
5911         struct list_head *cur = rec->backrefs.next;
5912         struct extent_backref *node;
5913         struct tree_backref *back;
5914
5915         while(cur != &rec->backrefs) {
5916                 node = to_extent_backref(cur);
5917                 cur = cur->next;
5918                 if (node->is_data)
5919                         continue;
5920                 back = to_tree_backref(node);
5921                 if (parent > 0) {
5922                         if (!node->full_backref)
5923                                 continue;
5924                         if (parent == back->parent)
5925                                 return back;
5926                 } else {
5927                         if (node->full_backref)
5928                                 continue;
5929                         if (back->root == root)
5930                                 return back;
5931                 }
5932         }
5933         return NULL;
5934 }
5935
5936 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5937                                                 u64 parent, u64 root)
5938 {
5939         struct tree_backref *ref = malloc(sizeof(*ref));
5940
5941         if (!ref)
5942                 return NULL;
5943         memset(&ref->node, 0, sizeof(ref->node));
5944         if (parent > 0) {
5945                 ref->parent = parent;
5946                 ref->node.full_backref = 1;
5947         } else {
5948                 ref->root = root;
5949                 ref->node.full_backref = 0;
5950         }
5951         list_add_tail(&ref->node.list, &rec->backrefs);
5952
5953         return ref;
5954 }
5955
5956 static struct data_backref *find_data_backref(struct extent_record *rec,
5957                                                 u64 parent, u64 root,
5958                                                 u64 owner, u64 offset,
5959                                                 int found_ref,
5960                                                 u64 disk_bytenr, u64 bytes)
5961 {
5962         struct list_head *cur = rec->backrefs.next;
5963         struct extent_backref *node;
5964         struct data_backref *back;
5965
5966         while(cur != &rec->backrefs) {
5967                 node = to_extent_backref(cur);
5968                 cur = cur->next;
5969                 if (!node->is_data)
5970                         continue;
5971                 back = to_data_backref(node);
5972                 if (parent > 0) {
5973                         if (!node->full_backref)
5974                                 continue;
5975                         if (parent == back->parent)
5976                                 return back;
5977                 } else {
5978                         if (node->full_backref)
5979                                 continue;
5980                         if (back->root == root && back->owner == owner &&
5981                             back->offset == offset) {
5982                                 if (found_ref && node->found_ref &&
5983                                     (back->bytes != bytes ||
5984                                     back->disk_bytenr != disk_bytenr))
5985                                         continue;
5986                                 return back;
5987                         }
5988                 }
5989         }
5990         return NULL;
5991 }
5992
5993 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5994                                                 u64 parent, u64 root,
5995                                                 u64 owner, u64 offset,
5996                                                 u64 max_size)
5997 {
5998         struct data_backref *ref = malloc(sizeof(*ref));
5999
6000         if (!ref)
6001                 return NULL;
6002         memset(&ref->node, 0, sizeof(ref->node));
6003         ref->node.is_data = 1;
6004
6005         if (parent > 0) {
6006                 ref->parent = parent;
6007                 ref->owner = 0;
6008                 ref->offset = 0;
6009                 ref->node.full_backref = 1;
6010         } else {
6011                 ref->root = root;
6012                 ref->owner = owner;
6013                 ref->offset = offset;
6014                 ref->node.full_backref = 0;
6015         }
6016         ref->bytes = max_size;
6017         ref->found_ref = 0;
6018         ref->num_refs = 0;
6019         list_add_tail(&ref->node.list, &rec->backrefs);
6020         if (max_size > rec->max_size)
6021                 rec->max_size = max_size;
6022         return ref;
6023 }
6024
6025 /* Check if the type of extent matches with its chunk */
6026 static void check_extent_type(struct extent_record *rec)
6027 {
6028         struct btrfs_block_group_cache *bg_cache;
6029
6030         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6031         if (!bg_cache)
6032                 return;
6033
6034         /* data extent, check chunk directly*/
6035         if (!rec->metadata) {
6036                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6037                         rec->wrong_chunk_type = 1;
6038                 return;
6039         }
6040
6041         /* metadata extent, check the obvious case first */
6042         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6043                                  BTRFS_BLOCK_GROUP_METADATA))) {
6044                 rec->wrong_chunk_type = 1;
6045                 return;
6046         }
6047
6048         /*
6049          * Check SYSTEM extent, as it's also marked as metadata, we can only
6050          * make sure it's a SYSTEM extent by its backref
6051          */
6052         if (!list_empty(&rec->backrefs)) {
6053                 struct extent_backref *node;
6054                 struct tree_backref *tback;
6055                 u64 bg_type;
6056
6057                 node = to_extent_backref(rec->backrefs.next);
6058                 if (node->is_data) {
6059                         /* tree block shouldn't have data backref */
6060                         rec->wrong_chunk_type = 1;
6061                         return;
6062                 }
6063                 tback = container_of(node, struct tree_backref, node);
6064
6065                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6066                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6067                 else
6068                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6069                 if (!(bg_cache->flags & bg_type))
6070                         rec->wrong_chunk_type = 1;
6071         }
6072 }
6073
6074 /*
6075  * Allocate a new extent record, fill default values from @tmpl and insert int
6076  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6077  * the cache, otherwise it fails.
6078  */
6079 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6080                 struct extent_record *tmpl)
6081 {
6082         struct extent_record *rec;
6083         int ret = 0;
6084
6085         BUG_ON(tmpl->max_size == 0);
6086         rec = malloc(sizeof(*rec));
6087         if (!rec)
6088                 return -ENOMEM;
6089         rec->start = tmpl->start;
6090         rec->max_size = tmpl->max_size;
6091         rec->nr = max(tmpl->nr, tmpl->max_size);
6092         rec->found_rec = tmpl->found_rec;
6093         rec->content_checked = tmpl->content_checked;
6094         rec->owner_ref_checked = tmpl->owner_ref_checked;
6095         rec->num_duplicates = 0;
6096         rec->metadata = tmpl->metadata;
6097         rec->flag_block_full_backref = FLAG_UNSET;
6098         rec->bad_full_backref = 0;
6099         rec->crossing_stripes = 0;
6100         rec->wrong_chunk_type = 0;
6101         rec->is_root = tmpl->is_root;
6102         rec->refs = tmpl->refs;
6103         rec->extent_item_refs = tmpl->extent_item_refs;
6104         rec->parent_generation = tmpl->parent_generation;
6105         INIT_LIST_HEAD(&rec->backrefs);
6106         INIT_LIST_HEAD(&rec->dups);
6107         INIT_LIST_HEAD(&rec->list);
6108         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6109         rec->cache.start = tmpl->start;
6110         rec->cache.size = tmpl->nr;
6111         ret = insert_cache_extent(extent_cache, &rec->cache);
6112         if (ret) {
6113                 free(rec);
6114                 return ret;
6115         }
6116         bytes_used += rec->nr;
6117
6118         if (tmpl->metadata)
6119                 rec->crossing_stripes = check_crossing_stripes(global_info,
6120                                 rec->start, global_info->nodesize);
6121         check_extent_type(rec);
6122         return ret;
6123 }
6124
6125 /*
6126  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6127  * some are hints:
6128  * - refs              - if found, increase refs
6129  * - is_root           - if found, set
6130  * - content_checked   - if found, set
6131  * - owner_ref_checked - if found, set
6132  *
6133  * If not found, create a new one, initialize and insert.
6134  */
6135 static int add_extent_rec(struct cache_tree *extent_cache,
6136                 struct extent_record *tmpl)
6137 {
6138         struct extent_record *rec;
6139         struct cache_extent *cache;
6140         int ret = 0;
6141         int dup = 0;
6142
6143         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6144         if (cache) {
6145                 rec = container_of(cache, struct extent_record, cache);
6146                 if (tmpl->refs)
6147                         rec->refs++;
6148                 if (rec->nr == 1)
6149                         rec->nr = max(tmpl->nr, tmpl->max_size);
6150
6151                 /*
6152                  * We need to make sure to reset nr to whatever the extent
6153                  * record says was the real size, this way we can compare it to
6154                  * the backrefs.
6155                  */
6156                 if (tmpl->found_rec) {
6157                         if (tmpl->start != rec->start || rec->found_rec) {
6158                                 struct extent_record *tmp;
6159
6160                                 dup = 1;
6161                                 if (list_empty(&rec->list))
6162                                         list_add_tail(&rec->list,
6163                                                       &duplicate_extents);
6164
6165                                 /*
6166                                  * We have to do this song and dance in case we
6167                                  * find an extent record that falls inside of
6168                                  * our current extent record but does not have
6169                                  * the same objectid.
6170                                  */
6171                                 tmp = malloc(sizeof(*tmp));
6172                                 if (!tmp)
6173                                         return -ENOMEM;
6174                                 tmp->start = tmpl->start;
6175                                 tmp->max_size = tmpl->max_size;
6176                                 tmp->nr = tmpl->nr;
6177                                 tmp->found_rec = 1;
6178                                 tmp->metadata = tmpl->metadata;
6179                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6180                                 INIT_LIST_HEAD(&tmp->list);
6181                                 list_add_tail(&tmp->list, &rec->dups);
6182                                 rec->num_duplicates++;
6183                         } else {
6184                                 rec->nr = tmpl->nr;
6185                                 rec->found_rec = 1;
6186                         }
6187                 }
6188
6189                 if (tmpl->extent_item_refs && !dup) {
6190                         if (rec->extent_item_refs) {
6191                                 fprintf(stderr, "block %llu rec "
6192                                         "extent_item_refs %llu, passed %llu\n",
6193                                         (unsigned long long)tmpl->start,
6194                                         (unsigned long long)
6195                                                         rec->extent_item_refs,
6196                                         (unsigned long long)tmpl->extent_item_refs);
6197                         }
6198                         rec->extent_item_refs = tmpl->extent_item_refs;
6199                 }
6200                 if (tmpl->is_root)
6201                         rec->is_root = 1;
6202                 if (tmpl->content_checked)
6203                         rec->content_checked = 1;
6204                 if (tmpl->owner_ref_checked)
6205                         rec->owner_ref_checked = 1;
6206                 memcpy(&rec->parent_key, &tmpl->parent_key,
6207                                 sizeof(tmpl->parent_key));
6208                 if (tmpl->parent_generation)
6209                         rec->parent_generation = tmpl->parent_generation;
6210                 if (rec->max_size < tmpl->max_size)
6211                         rec->max_size = tmpl->max_size;
6212
6213                 /*
6214                  * A metadata extent can't cross stripe_len boundary, otherwise
6215                  * kernel scrub won't be able to handle it.
6216                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6217                  * it.
6218                  */
6219                 if (tmpl->metadata)
6220                         rec->crossing_stripes = check_crossing_stripes(
6221                                         global_info, rec->start,
6222                                         global_info->nodesize);
6223                 check_extent_type(rec);
6224                 maybe_free_extent_rec(extent_cache, rec);
6225                 return ret;
6226         }
6227
6228         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6229
6230         return ret;
6231 }
6232
6233 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6234                             u64 parent, u64 root, int found_ref)
6235 {
6236         struct extent_record *rec;
6237         struct tree_backref *back;
6238         struct cache_extent *cache;
6239         int ret;
6240
6241         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6242         if (!cache) {
6243                 struct extent_record tmpl;
6244
6245                 memset(&tmpl, 0, sizeof(tmpl));
6246                 tmpl.start = bytenr;
6247                 tmpl.nr = 1;
6248                 tmpl.metadata = 1;
6249                 tmpl.max_size = 1;
6250
6251                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6252                 if (ret)
6253                         return ret;
6254
6255                 /* really a bug in cache_extent implement now */
6256                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6257                 if (!cache)
6258                         return -ENOENT;
6259         }
6260
6261         rec = container_of(cache, struct extent_record, cache);
6262         if (rec->start != bytenr) {
6263                 /*
6264                  * Several cause, from unaligned bytenr to over lapping extents
6265                  */
6266                 return -EEXIST;
6267         }
6268
6269         back = find_tree_backref(rec, parent, root);
6270         if (!back) {
6271                 back = alloc_tree_backref(rec, parent, root);
6272                 if (!back)
6273                         return -ENOMEM;
6274         }
6275
6276         if (found_ref) {
6277                 if (back->node.found_ref) {
6278                         fprintf(stderr, "Extent back ref already exists "
6279                                 "for %llu parent %llu root %llu \n",
6280                                 (unsigned long long)bytenr,
6281                                 (unsigned long long)parent,
6282                                 (unsigned long long)root);
6283                 }
6284                 back->node.found_ref = 1;
6285         } else {
6286                 if (back->node.found_extent_tree) {
6287                         fprintf(stderr, "Extent back ref already exists "
6288                                 "for %llu parent %llu root %llu \n",
6289                                 (unsigned long long)bytenr,
6290                                 (unsigned long long)parent,
6291                                 (unsigned long long)root);
6292                 }
6293                 back->node.found_extent_tree = 1;
6294         }
6295         check_extent_type(rec);
6296         maybe_free_extent_rec(extent_cache, rec);
6297         return 0;
6298 }
6299
6300 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6301                             u64 parent, u64 root, u64 owner, u64 offset,
6302                             u32 num_refs, int found_ref, u64 max_size)
6303 {
6304         struct extent_record *rec;
6305         struct data_backref *back;
6306         struct cache_extent *cache;
6307         int ret;
6308
6309         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6310         if (!cache) {
6311                 struct extent_record tmpl;
6312
6313                 memset(&tmpl, 0, sizeof(tmpl));
6314                 tmpl.start = bytenr;
6315                 tmpl.nr = 1;
6316                 tmpl.max_size = max_size;
6317
6318                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6319                 if (ret)
6320                         return ret;
6321
6322                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6323                 if (!cache)
6324                         abort();
6325         }
6326
6327         rec = container_of(cache, struct extent_record, cache);
6328         if (rec->max_size < max_size)
6329                 rec->max_size = max_size;
6330
6331         /*
6332          * If found_ref is set then max_size is the real size and must match the
6333          * existing refs.  So if we have already found a ref then we need to
6334          * make sure that this ref matches the existing one, otherwise we need
6335          * to add a new backref so we can notice that the backrefs don't match
6336          * and we need to figure out who is telling the truth.  This is to
6337          * account for that awful fsync bug I introduced where we'd end up with
6338          * a btrfs_file_extent_item that would have its length include multiple
6339          * prealloc extents or point inside of a prealloc extent.
6340          */
6341         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6342                                  bytenr, max_size);
6343         if (!back) {
6344                 back = alloc_data_backref(rec, parent, root, owner, offset,
6345                                           max_size);
6346                 BUG_ON(!back);
6347         }
6348
6349         if (found_ref) {
6350                 BUG_ON(num_refs != 1);
6351                 if (back->node.found_ref)
6352                         BUG_ON(back->bytes != max_size);
6353                 back->node.found_ref = 1;
6354                 back->found_ref += 1;
6355                 back->bytes = max_size;
6356                 back->disk_bytenr = bytenr;
6357                 rec->refs += 1;
6358                 rec->content_checked = 1;
6359                 rec->owner_ref_checked = 1;
6360         } else {
6361                 if (back->node.found_extent_tree) {
6362                         fprintf(stderr, "Extent back ref already exists "
6363                                 "for %llu parent %llu root %llu "
6364                                 "owner %llu offset %llu num_refs %lu\n",
6365                                 (unsigned long long)bytenr,
6366                                 (unsigned long long)parent,
6367                                 (unsigned long long)root,
6368                                 (unsigned long long)owner,
6369                                 (unsigned long long)offset,
6370                                 (unsigned long)num_refs);
6371                 }
6372                 back->num_refs = num_refs;
6373                 back->node.found_extent_tree = 1;
6374         }
6375         maybe_free_extent_rec(extent_cache, rec);
6376         return 0;
6377 }
6378
6379 static int add_pending(struct cache_tree *pending,
6380                        struct cache_tree *seen, u64 bytenr, u32 size)
6381 {
6382         int ret;
6383         ret = add_cache_extent(seen, bytenr, size);
6384         if (ret)
6385                 return ret;
6386         add_cache_extent(pending, bytenr, size);
6387         return 0;
6388 }
6389
6390 static int pick_next_pending(struct cache_tree *pending,
6391                         struct cache_tree *reada,
6392                         struct cache_tree *nodes,
6393                         u64 last, struct block_info *bits, int bits_nr,
6394                         int *reada_bits)
6395 {
6396         unsigned long node_start = last;
6397         struct cache_extent *cache;
6398         int ret;
6399
6400         cache = search_cache_extent(reada, 0);
6401         if (cache) {
6402                 bits[0].start = cache->start;
6403                 bits[0].size = cache->size;
6404                 *reada_bits = 1;
6405                 return 1;
6406         }
6407         *reada_bits = 0;
6408         if (node_start > 32768)
6409                 node_start -= 32768;
6410
6411         cache = search_cache_extent(nodes, node_start);
6412         if (!cache)
6413                 cache = search_cache_extent(nodes, 0);
6414
6415         if (!cache) {
6416                  cache = search_cache_extent(pending, 0);
6417                  if (!cache)
6418                          return 0;
6419                  ret = 0;
6420                  do {
6421                          bits[ret].start = cache->start;
6422                          bits[ret].size = cache->size;
6423                          cache = next_cache_extent(cache);
6424                          ret++;
6425                  } while (cache && ret < bits_nr);
6426                  return ret;
6427         }
6428
6429         ret = 0;
6430         do {
6431                 bits[ret].start = cache->start;
6432                 bits[ret].size = cache->size;
6433                 cache = next_cache_extent(cache);
6434                 ret++;
6435         } while (cache && ret < bits_nr);
6436
6437         if (bits_nr - ret > 8) {
6438                 u64 lookup = bits[0].start + bits[0].size;
6439                 struct cache_extent *next;
6440                 next = search_cache_extent(pending, lookup);
6441                 while(next) {
6442                         if (next->start - lookup > 32768)
6443                                 break;
6444                         bits[ret].start = next->start;
6445                         bits[ret].size = next->size;
6446                         lookup = next->start + next->size;
6447                         ret++;
6448                         if (ret == bits_nr)
6449                                 break;
6450                         next = next_cache_extent(next);
6451                         if (!next)
6452                                 break;
6453                 }
6454         }
6455         return ret;
6456 }
6457
6458 static void free_chunk_record(struct cache_extent *cache)
6459 {
6460         struct chunk_record *rec;
6461
6462         rec = container_of(cache, struct chunk_record, cache);
6463         list_del_init(&rec->list);
6464         list_del_init(&rec->dextents);
6465         free(rec);
6466 }
6467
6468 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6469 {
6470         cache_tree_free_extents(chunk_cache, free_chunk_record);
6471 }
6472
6473 static void free_device_record(struct rb_node *node)
6474 {
6475         struct device_record *rec;
6476
6477         rec = container_of(node, struct device_record, node);
6478         free(rec);
6479 }
6480
6481 FREE_RB_BASED_TREE(device_cache, free_device_record);
6482
6483 int insert_block_group_record(struct block_group_tree *tree,
6484                               struct block_group_record *bg_rec)
6485 {
6486         int ret;
6487
6488         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6489         if (ret)
6490                 return ret;
6491
6492         list_add_tail(&bg_rec->list, &tree->block_groups);
6493         return 0;
6494 }
6495
6496 static void free_block_group_record(struct cache_extent *cache)
6497 {
6498         struct block_group_record *rec;
6499
6500         rec = container_of(cache, struct block_group_record, cache);
6501         list_del_init(&rec->list);
6502         free(rec);
6503 }
6504
6505 void free_block_group_tree(struct block_group_tree *tree)
6506 {
6507         cache_tree_free_extents(&tree->tree, free_block_group_record);
6508 }
6509
6510 int insert_device_extent_record(struct device_extent_tree *tree,
6511                                 struct device_extent_record *de_rec)
6512 {
6513         int ret;
6514
6515         /*
6516          * Device extent is a bit different from the other extents, because
6517          * the extents which belong to the different devices may have the
6518          * same start and size, so we need use the special extent cache
6519          * search/insert functions.
6520          */
6521         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6522         if (ret)
6523                 return ret;
6524
6525         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6526         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6527         return 0;
6528 }
6529
6530 static void free_device_extent_record(struct cache_extent *cache)
6531 {
6532         struct device_extent_record *rec;
6533
6534         rec = container_of(cache, struct device_extent_record, cache);
6535         if (!list_empty(&rec->chunk_list))
6536                 list_del_init(&rec->chunk_list);
6537         if (!list_empty(&rec->device_list))
6538                 list_del_init(&rec->device_list);
6539         free(rec);
6540 }
6541
6542 void free_device_extent_tree(struct device_extent_tree *tree)
6543 {
6544         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6545 }
6546
6547 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6548 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6549                                  struct extent_buffer *leaf, int slot)
6550 {
6551         struct btrfs_extent_ref_v0 *ref0;
6552         struct btrfs_key key;
6553         int ret;
6554
6555         btrfs_item_key_to_cpu(leaf, &key, slot);
6556         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6557         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6558                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6559                                 0, 0);
6560         } else {
6561                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6562                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6563         }
6564         return ret;
6565 }
6566 #endif
6567
6568 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6569                                             struct btrfs_key *key,
6570                                             int slot)
6571 {
6572         struct btrfs_chunk *ptr;
6573         struct chunk_record *rec;
6574         int num_stripes, i;
6575
6576         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6577         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6578
6579         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6580         if (!rec) {
6581                 fprintf(stderr, "memory allocation failed\n");
6582                 exit(-1);
6583         }
6584
6585         INIT_LIST_HEAD(&rec->list);
6586         INIT_LIST_HEAD(&rec->dextents);
6587         rec->bg_rec = NULL;
6588
6589         rec->cache.start = key->offset;
6590         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6591
6592         rec->generation = btrfs_header_generation(leaf);
6593
6594         rec->objectid = key->objectid;
6595         rec->type = key->type;
6596         rec->offset = key->offset;
6597
6598         rec->length = rec->cache.size;
6599         rec->owner = btrfs_chunk_owner(leaf, ptr);
6600         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6601         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6602         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6603         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6604         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6605         rec->num_stripes = num_stripes;
6606         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6607
6608         for (i = 0; i < rec->num_stripes; ++i) {
6609                 rec->stripes[i].devid =
6610                         btrfs_stripe_devid_nr(leaf, ptr, i);
6611                 rec->stripes[i].offset =
6612                         btrfs_stripe_offset_nr(leaf, ptr, i);
6613                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6614                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6615                                 BTRFS_UUID_SIZE);
6616         }
6617
6618         return rec;
6619 }
6620
6621 static int process_chunk_item(struct cache_tree *chunk_cache,
6622                               struct btrfs_key *key, struct extent_buffer *eb,
6623                               int slot)
6624 {
6625         struct chunk_record *rec;
6626         struct btrfs_chunk *chunk;
6627         int ret = 0;
6628
6629         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6630         /*
6631          * Do extra check for this chunk item,
6632          *
6633          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6634          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6635          * and owner<->key_type check.
6636          */
6637         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6638                                       key->offset);
6639         if (ret < 0) {
6640                 error("chunk(%llu, %llu) is not valid, ignore it",
6641                       key->offset, btrfs_chunk_length(eb, chunk));
6642                 return 0;
6643         }
6644         rec = btrfs_new_chunk_record(eb, key, slot);
6645         ret = insert_cache_extent(chunk_cache, &rec->cache);
6646         if (ret) {
6647                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6648                         rec->offset, rec->length);
6649                 free(rec);
6650         }
6651
6652         return ret;
6653 }
6654
6655 static int process_device_item(struct rb_root *dev_cache,
6656                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6657 {
6658         struct btrfs_dev_item *ptr;
6659         struct device_record *rec;
6660         int ret = 0;
6661
6662         ptr = btrfs_item_ptr(eb,
6663                 slot, struct btrfs_dev_item);
6664
6665         rec = malloc(sizeof(*rec));
6666         if (!rec) {
6667                 fprintf(stderr, "memory allocation failed\n");
6668                 return -ENOMEM;
6669         }
6670
6671         rec->devid = key->offset;
6672         rec->generation = btrfs_header_generation(eb);
6673
6674         rec->objectid = key->objectid;
6675         rec->type = key->type;
6676         rec->offset = key->offset;
6677
6678         rec->devid = btrfs_device_id(eb, ptr);
6679         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6680         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6681
6682         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6683         if (ret) {
6684                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6685                 free(rec);
6686         }
6687
6688         return ret;
6689 }
6690
6691 struct block_group_record *
6692 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6693                              int slot)
6694 {
6695         struct btrfs_block_group_item *ptr;
6696         struct block_group_record *rec;
6697
6698         rec = calloc(1, sizeof(*rec));
6699         if (!rec) {
6700                 fprintf(stderr, "memory allocation failed\n");
6701                 exit(-1);
6702         }
6703
6704         rec->cache.start = key->objectid;
6705         rec->cache.size = key->offset;
6706
6707         rec->generation = btrfs_header_generation(leaf);
6708
6709         rec->objectid = key->objectid;
6710         rec->type = key->type;
6711         rec->offset = key->offset;
6712
6713         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6714         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6715
6716         INIT_LIST_HEAD(&rec->list);
6717
6718         return rec;
6719 }
6720
6721 static int process_block_group_item(struct block_group_tree *block_group_cache,
6722                                     struct btrfs_key *key,
6723                                     struct extent_buffer *eb, int slot)
6724 {
6725         struct block_group_record *rec;
6726         int ret = 0;
6727
6728         rec = btrfs_new_block_group_record(eb, key, slot);
6729         ret = insert_block_group_record(block_group_cache, rec);
6730         if (ret) {
6731                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6732                         rec->objectid, rec->offset);
6733                 free(rec);
6734         }
6735
6736         return ret;
6737 }
6738
6739 struct device_extent_record *
6740 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6741                                struct btrfs_key *key, int slot)
6742 {
6743         struct device_extent_record *rec;
6744         struct btrfs_dev_extent *ptr;
6745
6746         rec = calloc(1, sizeof(*rec));
6747         if (!rec) {
6748                 fprintf(stderr, "memory allocation failed\n");
6749                 exit(-1);
6750         }
6751
6752         rec->cache.objectid = key->objectid;
6753         rec->cache.start = key->offset;
6754
6755         rec->generation = btrfs_header_generation(leaf);
6756
6757         rec->objectid = key->objectid;
6758         rec->type = key->type;
6759         rec->offset = key->offset;
6760
6761         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6762         rec->chunk_objecteid =
6763                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6764         rec->chunk_offset =
6765                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6766         rec->length = btrfs_dev_extent_length(leaf, ptr);
6767         rec->cache.size = rec->length;
6768
6769         INIT_LIST_HEAD(&rec->chunk_list);
6770         INIT_LIST_HEAD(&rec->device_list);
6771
6772         return rec;
6773 }
6774
6775 static int
6776 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6777                            struct btrfs_key *key, struct extent_buffer *eb,
6778                            int slot)
6779 {
6780         struct device_extent_record *rec;
6781         int ret;
6782
6783         rec = btrfs_new_device_extent_record(eb, key, slot);
6784         ret = insert_device_extent_record(dev_extent_cache, rec);
6785         if (ret) {
6786                 fprintf(stderr,
6787                         "Device extent[%llu, %llu, %llu] existed.\n",
6788                         rec->objectid, rec->offset, rec->length);
6789                 free(rec);
6790         }
6791
6792         return ret;
6793 }
6794
6795 static int process_extent_item(struct btrfs_root *root,
6796                                struct cache_tree *extent_cache,
6797                                struct extent_buffer *eb, int slot)
6798 {
6799         struct btrfs_extent_item *ei;
6800         struct btrfs_extent_inline_ref *iref;
6801         struct btrfs_extent_data_ref *dref;
6802         struct btrfs_shared_data_ref *sref;
6803         struct btrfs_key key;
6804         struct extent_record tmpl;
6805         unsigned long end;
6806         unsigned long ptr;
6807         int ret;
6808         int type;
6809         u32 item_size = btrfs_item_size_nr(eb, slot);
6810         u64 refs = 0;
6811         u64 offset;
6812         u64 num_bytes;
6813         int metadata = 0;
6814
6815         btrfs_item_key_to_cpu(eb, &key, slot);
6816
6817         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6818                 metadata = 1;
6819                 num_bytes = root->fs_info->nodesize;
6820         } else {
6821                 num_bytes = key.offset;
6822         }
6823
6824         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6825                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6826                       key.objectid, root->fs_info->sectorsize);
6827                 return -EIO;
6828         }
6829         if (item_size < sizeof(*ei)) {
6830 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6831                 struct btrfs_extent_item_v0 *ei0;
6832                 BUG_ON(item_size != sizeof(*ei0));
6833                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6834                 refs = btrfs_extent_refs_v0(eb, ei0);
6835 #else
6836                 BUG();
6837 #endif
6838                 memset(&tmpl, 0, sizeof(tmpl));
6839                 tmpl.start = key.objectid;
6840                 tmpl.nr = num_bytes;
6841                 tmpl.extent_item_refs = refs;
6842                 tmpl.metadata = metadata;
6843                 tmpl.found_rec = 1;
6844                 tmpl.max_size = num_bytes;
6845
6846                 return add_extent_rec(extent_cache, &tmpl);
6847         }
6848
6849         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6850         refs = btrfs_extent_refs(eb, ei);
6851         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6852                 metadata = 1;
6853         else
6854                 metadata = 0;
6855         if (metadata && num_bytes != root->fs_info->nodesize) {
6856                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6857                       num_bytes, root->fs_info->nodesize);
6858                 return -EIO;
6859         }
6860         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6861                 error("ignore invalid data extent, length %llu is not aligned to %u",
6862                       num_bytes, root->fs_info->sectorsize);
6863                 return -EIO;
6864         }
6865
6866         memset(&tmpl, 0, sizeof(tmpl));
6867         tmpl.start = key.objectid;
6868         tmpl.nr = num_bytes;
6869         tmpl.extent_item_refs = refs;
6870         tmpl.metadata = metadata;
6871         tmpl.found_rec = 1;
6872         tmpl.max_size = num_bytes;
6873         add_extent_rec(extent_cache, &tmpl);
6874
6875         ptr = (unsigned long)(ei + 1);
6876         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6877             key.type == BTRFS_EXTENT_ITEM_KEY)
6878                 ptr += sizeof(struct btrfs_tree_block_info);
6879
6880         end = (unsigned long)ei + item_size;
6881         while (ptr < end) {
6882                 iref = (struct btrfs_extent_inline_ref *)ptr;
6883                 type = btrfs_extent_inline_ref_type(eb, iref);
6884                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6885                 switch (type) {
6886                 case BTRFS_TREE_BLOCK_REF_KEY:
6887                         ret = add_tree_backref(extent_cache, key.objectid,
6888                                         0, offset, 0);
6889                         if (ret < 0)
6890                                 error(
6891                         "add_tree_backref failed (extent items tree block): %s",
6892                                       strerror(-ret));
6893                         break;
6894                 case BTRFS_SHARED_BLOCK_REF_KEY:
6895                         ret = add_tree_backref(extent_cache, key.objectid,
6896                                         offset, 0, 0);
6897                         if (ret < 0)
6898                                 error(
6899                         "add_tree_backref failed (extent items shared block): %s",
6900                                       strerror(-ret));
6901                         break;
6902                 case BTRFS_EXTENT_DATA_REF_KEY:
6903                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6904                         add_data_backref(extent_cache, key.objectid, 0,
6905                                         btrfs_extent_data_ref_root(eb, dref),
6906                                         btrfs_extent_data_ref_objectid(eb,
6907                                                                        dref),
6908                                         btrfs_extent_data_ref_offset(eb, dref),
6909                                         btrfs_extent_data_ref_count(eb, dref),
6910                                         0, num_bytes);
6911                         break;
6912                 case BTRFS_SHARED_DATA_REF_KEY:
6913                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6914                         add_data_backref(extent_cache, key.objectid, offset,
6915                                         0, 0, 0,
6916                                         btrfs_shared_data_ref_count(eb, sref),
6917                                         0, num_bytes);
6918                         break;
6919                 default:
6920                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6921                                 key.objectid, key.type, num_bytes);
6922                         goto out;
6923                 }
6924                 ptr += btrfs_extent_inline_ref_size(type);
6925         }
6926         WARN_ON(ptr > end);
6927 out:
6928         return 0;
6929 }
6930
6931 static int check_cache_range(struct btrfs_root *root,
6932                              struct btrfs_block_group_cache *cache,
6933                              u64 offset, u64 bytes)
6934 {
6935         struct btrfs_free_space *entry;
6936         u64 *logical;
6937         u64 bytenr;
6938         int stripe_len;
6939         int i, nr, ret;
6940
6941         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6942                 bytenr = btrfs_sb_offset(i);
6943                 ret = btrfs_rmap_block(root->fs_info,
6944                                        cache->key.objectid, bytenr, 0,
6945                                        &logical, &nr, &stripe_len);
6946                 if (ret)
6947                         return ret;
6948
6949                 while (nr--) {
6950                         if (logical[nr] + stripe_len <= offset)
6951                                 continue;
6952                         if (offset + bytes <= logical[nr])
6953                                 continue;
6954                         if (logical[nr] == offset) {
6955                                 if (stripe_len >= bytes) {
6956                                         free(logical);
6957                                         return 0;
6958                                 }
6959                                 bytes -= stripe_len;
6960                                 offset += stripe_len;
6961                         } else if (logical[nr] < offset) {
6962                                 if (logical[nr] + stripe_len >=
6963                                     offset + bytes) {
6964                                         free(logical);
6965                                         return 0;
6966                                 }
6967                                 bytes = (offset + bytes) -
6968                                         (logical[nr] + stripe_len);
6969                                 offset = logical[nr] + stripe_len;
6970                         } else {
6971                                 /*
6972                                  * Could be tricky, the super may land in the
6973                                  * middle of the area we're checking.  First
6974                                  * check the easiest case, it's at the end.
6975                                  */
6976                                 if (logical[nr] + stripe_len >=
6977                                     bytes + offset) {
6978                                         bytes = logical[nr] - offset;
6979                                         continue;
6980                                 }
6981
6982                                 /* Check the left side */
6983                                 ret = check_cache_range(root, cache,
6984                                                         offset,
6985                                                         logical[nr] - offset);
6986                                 if (ret) {
6987                                         free(logical);
6988                                         return ret;
6989                                 }
6990
6991                                 /* Now we continue with the right side */
6992                                 bytes = (offset + bytes) -
6993                                         (logical[nr] + stripe_len);
6994                                 offset = logical[nr] + stripe_len;
6995                         }
6996                 }
6997
6998                 free(logical);
6999         }
7000
7001         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7002         if (!entry) {
7003                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7004                         offset, offset+bytes);
7005                 return -EINVAL;
7006         }
7007
7008         if (entry->offset != offset) {
7009                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7010                         entry->offset);
7011                 return -EINVAL;
7012         }
7013
7014         if (entry->bytes != bytes) {
7015                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7016                         bytes, entry->bytes, offset);
7017                 return -EINVAL;
7018         }
7019
7020         unlink_free_space(cache->free_space_ctl, entry);
7021         free(entry);
7022         return 0;
7023 }
7024
7025 static int verify_space_cache(struct btrfs_root *root,
7026                               struct btrfs_block_group_cache *cache)
7027 {
7028         struct btrfs_path path;
7029         struct extent_buffer *leaf;
7030         struct btrfs_key key;
7031         u64 last;
7032         int ret = 0;
7033
7034         root = root->fs_info->extent_root;
7035
7036         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7037
7038         btrfs_init_path(&path);
7039         key.objectid = last;
7040         key.offset = 0;
7041         key.type = BTRFS_EXTENT_ITEM_KEY;
7042         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7043         if (ret < 0)
7044                 goto out;
7045         ret = 0;
7046         while (1) {
7047                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7048                         ret = btrfs_next_leaf(root, &path);
7049                         if (ret < 0)
7050                                 goto out;
7051                         if (ret > 0) {
7052                                 ret = 0;
7053                                 break;
7054                         }
7055                 }
7056                 leaf = path.nodes[0];
7057                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7058                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7059                         break;
7060                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7061                     key.type != BTRFS_METADATA_ITEM_KEY) {
7062                         path.slots[0]++;
7063                         continue;
7064                 }
7065
7066                 if (last == key.objectid) {
7067                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7068                                 last = key.objectid + key.offset;
7069                         else
7070                                 last = key.objectid + root->fs_info->nodesize;
7071                         path.slots[0]++;
7072                         continue;
7073                 }
7074
7075                 ret = check_cache_range(root, cache, last,
7076                                         key.objectid - last);
7077                 if (ret)
7078                         break;
7079                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7080                         last = key.objectid + key.offset;
7081                 else
7082                         last = key.objectid + root->fs_info->nodesize;
7083                 path.slots[0]++;
7084         }
7085
7086         if (last < cache->key.objectid + cache->key.offset)
7087                 ret = check_cache_range(root, cache, last,
7088                                         cache->key.objectid +
7089                                         cache->key.offset - last);
7090
7091 out:
7092         btrfs_release_path(&path);
7093
7094         if (!ret &&
7095             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7096                 fprintf(stderr, "There are still entries left in the space "
7097                         "cache\n");
7098                 ret = -EINVAL;
7099         }
7100
7101         return ret;
7102 }
7103
7104 static int check_space_cache(struct btrfs_root *root)
7105 {
7106         struct btrfs_block_group_cache *cache;
7107         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7108         int ret;
7109         int error = 0;
7110
7111         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7112             btrfs_super_generation(root->fs_info->super_copy) !=
7113             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7114                 printf("cache and super generation don't match, space cache "
7115                        "will be invalidated\n");
7116                 return 0;
7117         }
7118
7119         if (ctx.progress_enabled) {
7120                 ctx.tp = TASK_FREE_SPACE;
7121                 task_start(ctx.info);
7122         }
7123
7124         while (1) {
7125                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7126                 if (!cache)
7127                         break;
7128
7129                 start = cache->key.objectid + cache->key.offset;
7130                 if (!cache->free_space_ctl) {
7131                         if (btrfs_init_free_space_ctl(cache,
7132                                                 root->fs_info->sectorsize)) {
7133                                 ret = -ENOMEM;
7134                                 break;
7135                         }
7136                 } else {
7137                         btrfs_remove_free_space_cache(cache);
7138                 }
7139
7140                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7141                         ret = exclude_super_stripes(root, cache);
7142                         if (ret) {
7143                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7144                                         strerror(-ret));
7145                                 error++;
7146                                 continue;
7147                         }
7148                         ret = load_free_space_tree(root->fs_info, cache);
7149                         free_excluded_extents(root, cache);
7150                         if (ret < 0) {
7151                                 fprintf(stderr, "could not load free space tree: %s\n",
7152                                         strerror(-ret));
7153                                 error++;
7154                                 continue;
7155                         }
7156                         error += ret;
7157                 } else {
7158                         ret = load_free_space_cache(root->fs_info, cache);
7159                         if (!ret)
7160                                 continue;
7161                 }
7162
7163                 ret = verify_space_cache(root, cache);
7164                 if (ret) {
7165                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7166                                 cache->key.objectid);
7167                         error++;
7168                 }
7169         }
7170
7171         task_stop(ctx.info);
7172
7173         return error ? -EINVAL : 0;
7174 }
7175
7176 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7177                         u64 num_bytes, unsigned long leaf_offset,
7178                         struct extent_buffer *eb) {
7179
7180         struct btrfs_fs_info *fs_info = root->fs_info;
7181         u64 offset = 0;
7182         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7183         char *data;
7184         unsigned long csum_offset;
7185         u32 csum;
7186         u32 csum_expected;
7187         u64 read_len;
7188         u64 data_checked = 0;
7189         u64 tmp;
7190         int ret = 0;
7191         int mirror;
7192         int num_copies;
7193
7194         if (num_bytes % fs_info->sectorsize)
7195                 return -EINVAL;
7196
7197         data = malloc(num_bytes);
7198         if (!data)
7199                 return -ENOMEM;
7200
7201         while (offset < num_bytes) {
7202                 mirror = 0;
7203 again:
7204                 read_len = num_bytes - offset;
7205                 /* read as much space once a time */
7206                 ret = read_extent_data(fs_info, data + offset,
7207                                 bytenr + offset, &read_len, mirror);
7208                 if (ret)
7209                         goto out;
7210                 data_checked = 0;
7211                 /* verify every 4k data's checksum */
7212                 while (data_checked < read_len) {
7213                         csum = ~(u32)0;
7214                         tmp = offset + data_checked;
7215
7216                         csum = btrfs_csum_data((char *)data + tmp,
7217                                                csum, fs_info->sectorsize);
7218                         btrfs_csum_final(csum, (u8 *)&csum);
7219
7220                         csum_offset = leaf_offset +
7221                                  tmp / fs_info->sectorsize * csum_size;
7222                         read_extent_buffer(eb, (char *)&csum_expected,
7223                                            csum_offset, csum_size);
7224                         /* try another mirror */
7225                         if (csum != csum_expected) {
7226                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7227                                                 mirror, bytenr + tmp,
7228                                                 csum, csum_expected);
7229                                 num_copies = btrfs_num_copies(root->fs_info,
7230                                                 bytenr, num_bytes);
7231                                 if (mirror < num_copies - 1) {
7232                                         mirror += 1;
7233                                         goto again;
7234                                 }
7235                         }
7236                         data_checked += fs_info->sectorsize;
7237                 }
7238                 offset += read_len;
7239         }
7240 out:
7241         free(data);
7242         return ret;
7243 }
7244
7245 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7246                                u64 num_bytes)
7247 {
7248         struct btrfs_path path;
7249         struct extent_buffer *leaf;
7250         struct btrfs_key key;
7251         int ret;
7252
7253         btrfs_init_path(&path);
7254         key.objectid = bytenr;
7255         key.type = BTRFS_EXTENT_ITEM_KEY;
7256         key.offset = (u64)-1;
7257
7258 again:
7259         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7260                                 0, 0);
7261         if (ret < 0) {
7262                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7263                 btrfs_release_path(&path);
7264                 return ret;
7265         } else if (ret) {
7266                 if (path.slots[0] > 0) {
7267                         path.slots[0]--;
7268                 } else {
7269                         ret = btrfs_prev_leaf(root, &path);
7270                         if (ret < 0) {
7271                                 goto out;
7272                         } else if (ret > 0) {
7273                                 ret = 0;
7274                                 goto out;
7275                         }
7276                 }
7277         }
7278
7279         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7280
7281         /*
7282          * Block group items come before extent items if they have the same
7283          * bytenr, so walk back one more just in case.  Dear future traveller,
7284          * first congrats on mastering time travel.  Now if it's not too much
7285          * trouble could you go back to 2006 and tell Chris to make the
7286          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7287          * EXTENT_ITEM_KEY please?
7288          */
7289         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7290                 if (path.slots[0] > 0) {
7291                         path.slots[0]--;
7292                 } else {
7293                         ret = btrfs_prev_leaf(root, &path);
7294                         if (ret < 0) {
7295                                 goto out;
7296                         } else if (ret > 0) {
7297                                 ret = 0;
7298                                 goto out;
7299                         }
7300                 }
7301                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7302         }
7303
7304         while (num_bytes) {
7305                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7306                         ret = btrfs_next_leaf(root, &path);
7307                         if (ret < 0) {
7308                                 fprintf(stderr, "Error going to next leaf "
7309                                         "%d\n", ret);
7310                                 btrfs_release_path(&path);
7311                                 return ret;
7312                         } else if (ret) {
7313                                 break;
7314                         }
7315                 }
7316                 leaf = path.nodes[0];
7317                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7318                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7319                         path.slots[0]++;
7320                         continue;
7321                 }
7322                 if (key.objectid + key.offset < bytenr) {
7323                         path.slots[0]++;
7324                         continue;
7325                 }
7326                 if (key.objectid > bytenr + num_bytes)
7327                         break;
7328
7329                 if (key.objectid == bytenr) {
7330                         if (key.offset >= num_bytes) {
7331                                 num_bytes = 0;
7332                                 break;
7333                         }
7334                         num_bytes -= key.offset;
7335                         bytenr += key.offset;
7336                 } else if (key.objectid < bytenr) {
7337                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7338                                 num_bytes = 0;
7339                                 break;
7340                         }
7341                         num_bytes = (bytenr + num_bytes) -
7342                                 (key.objectid + key.offset);
7343                         bytenr = key.objectid + key.offset;
7344                 } else {
7345                         if (key.objectid + key.offset < bytenr + num_bytes) {
7346                                 u64 new_start = key.objectid + key.offset;
7347                                 u64 new_bytes = bytenr + num_bytes - new_start;
7348
7349                                 /*
7350                                  * Weird case, the extent is in the middle of
7351                                  * our range, we'll have to search one side
7352                                  * and then the other.  Not sure if this happens
7353                                  * in real life, but no harm in coding it up
7354                                  * anyway just in case.
7355                                  */
7356                                 btrfs_release_path(&path);
7357                                 ret = check_extent_exists(root, new_start,
7358                                                           new_bytes);
7359                                 if (ret) {
7360                                         fprintf(stderr, "Right section didn't "
7361                                                 "have a record\n");
7362                                         break;
7363                                 }
7364                                 num_bytes = key.objectid - bytenr;
7365                                 goto again;
7366                         }
7367                         num_bytes = key.objectid - bytenr;
7368                 }
7369                 path.slots[0]++;
7370         }
7371         ret = 0;
7372
7373 out:
7374         if (num_bytes && !ret) {
7375                 fprintf(stderr, "There are no extents for csum range "
7376                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7377                 ret = 1;
7378         }
7379
7380         btrfs_release_path(&path);
7381         return ret;
7382 }
7383
7384 static int check_csums(struct btrfs_root *root)
7385 {
7386         struct btrfs_path path;
7387         struct extent_buffer *leaf;
7388         struct btrfs_key key;
7389         u64 offset = 0, num_bytes = 0;
7390         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7391         int errors = 0;
7392         int ret;
7393         u64 data_len;
7394         unsigned long leaf_offset;
7395
7396         root = root->fs_info->csum_root;
7397         if (!extent_buffer_uptodate(root->node)) {
7398                 fprintf(stderr, "No valid csum tree found\n");
7399                 return -ENOENT;
7400         }
7401
7402         btrfs_init_path(&path);
7403         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7404         key.type = BTRFS_EXTENT_CSUM_KEY;
7405         key.offset = 0;
7406         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7407         if (ret < 0) {
7408                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7409                 btrfs_release_path(&path);
7410                 return ret;
7411         }
7412
7413         if (ret > 0 && path.slots[0])
7414                 path.slots[0]--;
7415         ret = 0;
7416
7417         while (1) {
7418                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7419                         ret = btrfs_next_leaf(root, &path);
7420                         if (ret < 0) {
7421                                 fprintf(stderr, "Error going to next leaf "
7422                                         "%d\n", ret);
7423                                 break;
7424                         }
7425                         if (ret)
7426                                 break;
7427                 }
7428                 leaf = path.nodes[0];
7429
7430                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7431                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7432                         path.slots[0]++;
7433                         continue;
7434                 }
7435
7436                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7437                               csum_size) * root->fs_info->sectorsize;
7438                 if (!check_data_csum)
7439                         goto skip_csum_check;
7440                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7441                 ret = check_extent_csums(root, key.offset, data_len,
7442                                          leaf_offset, leaf);
7443                 if (ret)
7444                         break;
7445 skip_csum_check:
7446                 if (!num_bytes) {
7447                         offset = key.offset;
7448                 } else if (key.offset != offset + num_bytes) {
7449                         ret = check_extent_exists(root, offset, num_bytes);
7450                         if (ret) {
7451                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7452                                         "there is no extent record\n",
7453                                         offset, offset+num_bytes);
7454                                 errors++;
7455                         }
7456                         offset = key.offset;
7457                         num_bytes = 0;
7458                 }
7459                 num_bytes += data_len;
7460                 path.slots[0]++;
7461         }
7462
7463         btrfs_release_path(&path);
7464         return errors;
7465 }
7466
7467 static int is_dropped_key(struct btrfs_key *key,
7468                           struct btrfs_key *drop_key) {
7469         if (key->objectid < drop_key->objectid)
7470                 return 1;
7471         else if (key->objectid == drop_key->objectid) {
7472                 if (key->type < drop_key->type)
7473                         return 1;
7474                 else if (key->type == drop_key->type) {
7475                         if (key->offset < drop_key->offset)
7476                                 return 1;
7477                 }
7478         }
7479         return 0;
7480 }
7481
7482 /*
7483  * Here are the rules for FULL_BACKREF.
7484  *
7485  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7486  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7487  *      FULL_BACKREF set.
7488  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7489  *    if it happened after the relocation occurred since we'll have dropped the
7490  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7491  *    have no real way to know for sure.
7492  *
7493  * We process the blocks one root at a time, and we start from the lowest root
7494  * objectid and go to the highest.  So we can just lookup the owner backref for
7495  * the record and if we don't find it then we know it doesn't exist and we have
7496  * a FULL BACKREF.
7497  *
7498  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7499  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7500  * be set or not and then we can check later once we've gathered all the refs.
7501  */
7502 static int calc_extent_flag(struct cache_tree *extent_cache,
7503                            struct extent_buffer *buf,
7504                            struct root_item_record *ri,
7505                            u64 *flags)
7506 {
7507         struct extent_record *rec;
7508         struct cache_extent *cache;
7509         struct tree_backref *tback;
7510         u64 owner = 0;
7511
7512         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7513         /* we have added this extent before */
7514         if (!cache)
7515                 return -ENOENT;
7516
7517         rec = container_of(cache, struct extent_record, cache);
7518
7519         /*
7520          * Except file/reloc tree, we can not have
7521          * FULL BACKREF MODE
7522          */
7523         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7524                 goto normal;
7525         /*
7526          * root node
7527          */
7528         if (buf->start == ri->bytenr)
7529                 goto normal;
7530
7531         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7532                 goto full_backref;
7533
7534         owner = btrfs_header_owner(buf);
7535         if (owner == ri->objectid)
7536                 goto normal;
7537
7538         tback = find_tree_backref(rec, 0, owner);
7539         if (!tback)
7540                 goto full_backref;
7541 normal:
7542         *flags = 0;
7543         if (rec->flag_block_full_backref != FLAG_UNSET &&
7544             rec->flag_block_full_backref != 0)
7545                 rec->bad_full_backref = 1;
7546         return 0;
7547 full_backref:
7548         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7549         if (rec->flag_block_full_backref != FLAG_UNSET &&
7550             rec->flag_block_full_backref != 1)
7551                 rec->bad_full_backref = 1;
7552         return 0;
7553 }
7554
7555 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7556 {
7557         fprintf(stderr, "Invalid key type(");
7558         print_key_type(stderr, 0, key_type);
7559         fprintf(stderr, ") found in root(");
7560         print_objectid(stderr, rootid, 0);
7561         fprintf(stderr, ")\n");
7562 }
7563
7564 /*
7565  * Check if the key is valid with its extent buffer.
7566  *
7567  * This is a early check in case invalid key exists in a extent buffer
7568  * This is not comprehensive yet, but should prevent wrong key/item passed
7569  * further
7570  */
7571 static int check_type_with_root(u64 rootid, u8 key_type)
7572 {
7573         switch (key_type) {
7574         /* Only valid in chunk tree */
7575         case BTRFS_DEV_ITEM_KEY:
7576         case BTRFS_CHUNK_ITEM_KEY:
7577                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7578                         goto err;
7579                 break;
7580         /* valid in csum and log tree */
7581         case BTRFS_CSUM_TREE_OBJECTID:
7582                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7583                       is_fstree(rootid)))
7584                         goto err;
7585                 break;
7586         case BTRFS_EXTENT_ITEM_KEY:
7587         case BTRFS_METADATA_ITEM_KEY:
7588         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7589                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7590                         goto err;
7591                 break;
7592         case BTRFS_ROOT_ITEM_KEY:
7593                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7594                         goto err;
7595                 break;
7596         case BTRFS_DEV_EXTENT_KEY:
7597                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7598                         goto err;
7599                 break;
7600         }
7601         return 0;
7602 err:
7603         report_mismatch_key_root(key_type, rootid);
7604         return -EINVAL;
7605 }
7606
7607 static int run_next_block(struct btrfs_root *root,
7608                           struct block_info *bits,
7609                           int bits_nr,
7610                           u64 *last,
7611                           struct cache_tree *pending,
7612                           struct cache_tree *seen,
7613                           struct cache_tree *reada,
7614                           struct cache_tree *nodes,
7615                           struct cache_tree *extent_cache,
7616                           struct cache_tree *chunk_cache,
7617                           struct rb_root *dev_cache,
7618                           struct block_group_tree *block_group_cache,
7619                           struct device_extent_tree *dev_extent_cache,
7620                           struct root_item_record *ri)
7621 {
7622         struct btrfs_fs_info *fs_info = root->fs_info;
7623         struct extent_buffer *buf;
7624         struct extent_record *rec = NULL;
7625         u64 bytenr;
7626         u32 size;
7627         u64 parent;
7628         u64 owner;
7629         u64 flags;
7630         u64 ptr;
7631         u64 gen = 0;
7632         int ret = 0;
7633         int i;
7634         int nritems;
7635         struct btrfs_key key;
7636         struct cache_extent *cache;
7637         int reada_bits;
7638
7639         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7640                                     bits_nr, &reada_bits);
7641         if (nritems == 0)
7642                 return 1;
7643
7644         if (!reada_bits) {
7645                 for(i = 0; i < nritems; i++) {
7646                         ret = add_cache_extent(reada, bits[i].start,
7647                                                bits[i].size);
7648                         if (ret == -EEXIST)
7649                                 continue;
7650
7651                         /* fixme, get the parent transid */
7652                         readahead_tree_block(fs_info, bits[i].start, 0);
7653                 }
7654         }
7655         *last = bits[0].start;
7656         bytenr = bits[0].start;
7657         size = bits[0].size;
7658
7659         cache = lookup_cache_extent(pending, bytenr, size);
7660         if (cache) {
7661                 remove_cache_extent(pending, cache);
7662                 free(cache);
7663         }
7664         cache = lookup_cache_extent(reada, bytenr, size);
7665         if (cache) {
7666                 remove_cache_extent(reada, cache);
7667                 free(cache);
7668         }
7669         cache = lookup_cache_extent(nodes, bytenr, size);
7670         if (cache) {
7671                 remove_cache_extent(nodes, cache);
7672                 free(cache);
7673         }
7674         cache = lookup_cache_extent(extent_cache, bytenr, size);
7675         if (cache) {
7676                 rec = container_of(cache, struct extent_record, cache);
7677                 gen = rec->parent_generation;
7678         }
7679
7680         /* fixme, get the real parent transid */
7681         buf = read_tree_block(root->fs_info, bytenr, gen);
7682         if (!extent_buffer_uptodate(buf)) {
7683                 record_bad_block_io(root->fs_info,
7684                                     extent_cache, bytenr, size);
7685                 goto out;
7686         }
7687
7688         nritems = btrfs_header_nritems(buf);
7689
7690         flags = 0;
7691         if (!init_extent_tree) {
7692                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7693                                        btrfs_header_level(buf), 1, NULL,
7694                                        &flags);
7695                 if (ret < 0) {
7696                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7697                         if (ret < 0) {
7698                                 fprintf(stderr, "Couldn't calc extent flags\n");
7699                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7700                         }
7701                 }
7702         } else {
7703                 flags = 0;
7704                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7705                 if (ret < 0) {
7706                         fprintf(stderr, "Couldn't calc extent flags\n");
7707                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7708                 }
7709         }
7710
7711         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7712                 if (ri != NULL &&
7713                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7714                     ri->objectid == btrfs_header_owner(buf)) {
7715                         /*
7716                          * Ok we got to this block from it's original owner and
7717                          * we have FULL_BACKREF set.  Relocation can leave
7718                          * converted blocks over so this is altogether possible,
7719                          * however it's not possible if the generation > the
7720                          * last snapshot, so check for this case.
7721                          */
7722                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7723                             btrfs_header_generation(buf) > ri->last_snapshot) {
7724                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7725                                 rec->bad_full_backref = 1;
7726                         }
7727                 }
7728         } else {
7729                 if (ri != NULL &&
7730                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7731                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7732                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7733                         rec->bad_full_backref = 1;
7734                 }
7735         }
7736
7737         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7738                 rec->flag_block_full_backref = 1;
7739                 parent = bytenr;
7740                 owner = 0;
7741         } else {
7742                 rec->flag_block_full_backref = 0;
7743                 parent = 0;
7744                 owner = btrfs_header_owner(buf);
7745         }
7746
7747         ret = check_block(root, extent_cache, buf, flags);
7748         if (ret)
7749                 goto out;
7750
7751         if (btrfs_is_leaf(buf)) {
7752                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7753                 for (i = 0; i < nritems; i++) {
7754                         struct btrfs_file_extent_item *fi;
7755                         btrfs_item_key_to_cpu(buf, &key, i);
7756                         /*
7757                          * Check key type against the leaf owner.
7758                          * Could filter quite a lot of early error if
7759                          * owner is correct
7760                          */
7761                         if (check_type_with_root(btrfs_header_owner(buf),
7762                                                  key.type)) {
7763                                 fprintf(stderr, "ignoring invalid key\n");
7764                                 continue;
7765                         }
7766                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7767                                 process_extent_item(root, extent_cache, buf,
7768                                                     i);
7769                                 continue;
7770                         }
7771                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7772                                 process_extent_item(root, extent_cache, buf,
7773                                                     i);
7774                                 continue;
7775                         }
7776                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7777                                 total_csum_bytes +=
7778                                         btrfs_item_size_nr(buf, i);
7779                                 continue;
7780                         }
7781                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7782                                 process_chunk_item(chunk_cache, &key, buf, i);
7783                                 continue;
7784                         }
7785                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7786                                 process_device_item(dev_cache, &key, buf, i);
7787                                 continue;
7788                         }
7789                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7790                                 process_block_group_item(block_group_cache,
7791                                         &key, buf, i);
7792                                 continue;
7793                         }
7794                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7795                                 process_device_extent_item(dev_extent_cache,
7796                                         &key, buf, i);
7797                                 continue;
7798
7799                         }
7800                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7801 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7802                                 process_extent_ref_v0(extent_cache, buf, i);
7803 #else
7804                                 BUG();
7805 #endif
7806                                 continue;
7807                         }
7808
7809                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7810                                 ret = add_tree_backref(extent_cache,
7811                                                 key.objectid, 0, key.offset, 0);
7812                                 if (ret < 0)
7813                                         error(
7814                                 "add_tree_backref failed (leaf tree block): %s",
7815                                               strerror(-ret));
7816                                 continue;
7817                         }
7818                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7819                                 ret = add_tree_backref(extent_cache,
7820                                                 key.objectid, key.offset, 0, 0);
7821                                 if (ret < 0)
7822                                         error(
7823                                 "add_tree_backref failed (leaf shared block): %s",
7824                                               strerror(-ret));
7825                                 continue;
7826                         }
7827                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7828                                 struct btrfs_extent_data_ref *ref;
7829                                 ref = btrfs_item_ptr(buf, i,
7830                                                 struct btrfs_extent_data_ref);
7831                                 add_data_backref(extent_cache,
7832                                         key.objectid, 0,
7833                                         btrfs_extent_data_ref_root(buf, ref),
7834                                         btrfs_extent_data_ref_objectid(buf,
7835                                                                        ref),
7836                                         btrfs_extent_data_ref_offset(buf, ref),
7837                                         btrfs_extent_data_ref_count(buf, ref),
7838                                         0, root->fs_info->sectorsize);
7839                                 continue;
7840                         }
7841                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7842                                 struct btrfs_shared_data_ref *ref;
7843                                 ref = btrfs_item_ptr(buf, i,
7844                                                 struct btrfs_shared_data_ref);
7845                                 add_data_backref(extent_cache,
7846                                         key.objectid, key.offset, 0, 0, 0,
7847                                         btrfs_shared_data_ref_count(buf, ref),
7848                                         0, root->fs_info->sectorsize);
7849                                 continue;
7850                         }
7851                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7852                                 struct bad_item *bad;
7853
7854                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7855                                         continue;
7856                                 if (!owner)
7857                                         continue;
7858                                 bad = malloc(sizeof(struct bad_item));
7859                                 if (!bad)
7860                                         continue;
7861                                 INIT_LIST_HEAD(&bad->list);
7862                                 memcpy(&bad->key, &key,
7863                                        sizeof(struct btrfs_key));
7864                                 bad->root_id = owner;
7865                                 list_add_tail(&bad->list, &delete_items);
7866                                 continue;
7867                         }
7868                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7869                                 continue;
7870                         fi = btrfs_item_ptr(buf, i,
7871                                             struct btrfs_file_extent_item);
7872                         if (btrfs_file_extent_type(buf, fi) ==
7873                             BTRFS_FILE_EXTENT_INLINE)
7874                                 continue;
7875                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7876                                 continue;
7877
7878                         data_bytes_allocated +=
7879                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7880                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7881                                 abort();
7882                         }
7883                         data_bytes_referenced +=
7884                                 btrfs_file_extent_num_bytes(buf, fi);
7885                         add_data_backref(extent_cache,
7886                                 btrfs_file_extent_disk_bytenr(buf, fi),
7887                                 parent, owner, key.objectid, key.offset -
7888                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7889                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7890                 }
7891         } else {
7892                 int level;
7893                 struct btrfs_key first_key;
7894
7895                 first_key.objectid = 0;
7896
7897                 if (nritems > 0)
7898                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7899                 level = btrfs_header_level(buf);
7900                 for (i = 0; i < nritems; i++) {
7901                         struct extent_record tmpl;
7902
7903                         ptr = btrfs_node_blockptr(buf, i);
7904                         size = root->fs_info->nodesize;
7905                         btrfs_node_key_to_cpu(buf, &key, i);
7906                         if (ri != NULL) {
7907                                 if ((level == ri->drop_level)
7908                                     && is_dropped_key(&key, &ri->drop_key)) {
7909                                         continue;
7910                                 }
7911                         }
7912
7913                         memset(&tmpl, 0, sizeof(tmpl));
7914                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7915                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7916                         tmpl.start = ptr;
7917                         tmpl.nr = size;
7918                         tmpl.refs = 1;
7919                         tmpl.metadata = 1;
7920                         tmpl.max_size = size;
7921                         ret = add_extent_rec(extent_cache, &tmpl);
7922                         if (ret < 0)
7923                                 goto out;
7924
7925                         ret = add_tree_backref(extent_cache, ptr, parent,
7926                                         owner, 1);
7927                         if (ret < 0) {
7928                                 error(
7929                                 "add_tree_backref failed (non-leaf block): %s",
7930                                       strerror(-ret));
7931                                 continue;
7932                         }
7933
7934                         if (level > 1) {
7935                                 add_pending(nodes, seen, ptr, size);
7936                         } else {
7937                                 add_pending(pending, seen, ptr, size);
7938                         }
7939                 }
7940                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7941                                       nritems) * sizeof(struct btrfs_key_ptr);
7942         }
7943         total_btree_bytes += buf->len;
7944         if (fs_root_objectid(btrfs_header_owner(buf)))
7945                 total_fs_tree_bytes += buf->len;
7946         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7947                 total_extent_tree_bytes += buf->len;
7948         if (!found_old_backref &&
7949             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7950             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7951             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7952                 found_old_backref = 1;
7953 out:
7954         free_extent_buffer(buf);
7955         return ret;
7956 }
7957
7958 static int add_root_to_pending(struct extent_buffer *buf,
7959                                struct cache_tree *extent_cache,
7960                                struct cache_tree *pending,
7961                                struct cache_tree *seen,
7962                                struct cache_tree *nodes,
7963                                u64 objectid)
7964 {
7965         struct extent_record tmpl;
7966         int ret;
7967
7968         if (btrfs_header_level(buf) > 0)
7969                 add_pending(nodes, seen, buf->start, buf->len);
7970         else
7971                 add_pending(pending, seen, buf->start, buf->len);
7972
7973         memset(&tmpl, 0, sizeof(tmpl));
7974         tmpl.start = buf->start;
7975         tmpl.nr = buf->len;
7976         tmpl.is_root = 1;
7977         tmpl.refs = 1;
7978         tmpl.metadata = 1;
7979         tmpl.max_size = buf->len;
7980         add_extent_rec(extent_cache, &tmpl);
7981
7982         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7983             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7984                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7985                                 0, 1);
7986         else
7987                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7988                                 1);
7989         return ret;
7990 }
7991
7992 /* as we fix the tree, we might be deleting blocks that
7993  * we're tracking for repair.  This hook makes sure we
7994  * remove any backrefs for blocks as we are fixing them.
7995  */
7996 static int free_extent_hook(struct btrfs_trans_handle *trans,
7997                             struct btrfs_root *root,
7998                             u64 bytenr, u64 num_bytes, u64 parent,
7999                             u64 root_objectid, u64 owner, u64 offset,
8000                             int refs_to_drop)
8001 {
8002         struct extent_record *rec;
8003         struct cache_extent *cache;
8004         int is_data;
8005         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8006
8007         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8008         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8009         if (!cache)
8010                 return 0;
8011
8012         rec = container_of(cache, struct extent_record, cache);
8013         if (is_data) {
8014                 struct data_backref *back;
8015                 back = find_data_backref(rec, parent, root_objectid, owner,
8016                                          offset, 1, bytenr, num_bytes);
8017                 if (!back)
8018                         goto out;
8019                 if (back->node.found_ref) {
8020                         back->found_ref -= refs_to_drop;
8021                         if (rec->refs)
8022                                 rec->refs -= refs_to_drop;
8023                 }
8024                 if (back->node.found_extent_tree) {
8025                         back->num_refs -= refs_to_drop;
8026                         if (rec->extent_item_refs)
8027                                 rec->extent_item_refs -= refs_to_drop;
8028                 }
8029                 if (back->found_ref == 0)
8030                         back->node.found_ref = 0;
8031                 if (back->num_refs == 0)
8032                         back->node.found_extent_tree = 0;
8033
8034                 if (!back->node.found_extent_tree && back->node.found_ref) {
8035                         list_del(&back->node.list);
8036                         free(back);
8037                 }
8038         } else {
8039                 struct tree_backref *back;
8040                 back = find_tree_backref(rec, parent, root_objectid);
8041                 if (!back)
8042                         goto out;
8043                 if (back->node.found_ref) {
8044                         if (rec->refs)
8045                                 rec->refs--;
8046                         back->node.found_ref = 0;
8047                 }
8048                 if (back->node.found_extent_tree) {
8049                         if (rec->extent_item_refs)
8050                                 rec->extent_item_refs--;
8051                         back->node.found_extent_tree = 0;
8052                 }
8053                 if (!back->node.found_extent_tree && back->node.found_ref) {
8054                         list_del(&back->node.list);
8055                         free(back);
8056                 }
8057         }
8058         maybe_free_extent_rec(extent_cache, rec);
8059 out:
8060         return 0;
8061 }
8062
8063 static int delete_extent_records(struct btrfs_trans_handle *trans,
8064                                  struct btrfs_root *root,
8065                                  struct btrfs_path *path,
8066                                  u64 bytenr)
8067 {
8068         struct btrfs_key key;
8069         struct btrfs_key found_key;
8070         struct extent_buffer *leaf;
8071         int ret;
8072         int slot;
8073
8074
8075         key.objectid = bytenr;
8076         key.type = (u8)-1;
8077         key.offset = (u64)-1;
8078
8079         while(1) {
8080                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8081                                         &key, path, 0, 1);
8082                 if (ret < 0)
8083                         break;
8084
8085                 if (ret > 0) {
8086                         ret = 0;
8087                         if (path->slots[0] == 0)
8088                                 break;
8089                         path->slots[0]--;
8090                 }
8091                 ret = 0;
8092
8093                 leaf = path->nodes[0];
8094                 slot = path->slots[0];
8095
8096                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8097                 if (found_key.objectid != bytenr)
8098                         break;
8099
8100                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8101                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8102                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8103                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8104                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8105                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8106                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8107                         btrfs_release_path(path);
8108                         if (found_key.type == 0) {
8109                                 if (found_key.offset == 0)
8110                                         break;
8111                                 key.offset = found_key.offset - 1;
8112                                 key.type = found_key.type;
8113                         }
8114                         key.type = found_key.type - 1;
8115                         key.offset = (u64)-1;
8116                         continue;
8117                 }
8118
8119                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8120                         found_key.objectid, found_key.type, found_key.offset);
8121
8122                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8123                 if (ret)
8124                         break;
8125                 btrfs_release_path(path);
8126
8127                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8128                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8129                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8130                                 found_key.offset : root->fs_info->nodesize;
8131
8132                         ret = btrfs_update_block_group(trans, root, bytenr,
8133                                                        bytes, 0, 0);
8134                         if (ret)
8135                                 break;
8136                 }
8137         }
8138
8139         btrfs_release_path(path);
8140         return ret;
8141 }
8142
8143 /*
8144  * for a single backref, this will allocate a new extent
8145  * and add the backref to it.
8146  */
8147 static int record_extent(struct btrfs_trans_handle *trans,
8148                          struct btrfs_fs_info *info,
8149                          struct btrfs_path *path,
8150                          struct extent_record *rec,
8151                          struct extent_backref *back,
8152                          int allocated, u64 flags)
8153 {
8154         int ret = 0;
8155         struct btrfs_root *extent_root = info->extent_root;
8156         struct extent_buffer *leaf;
8157         struct btrfs_key ins_key;
8158         struct btrfs_extent_item *ei;
8159         struct data_backref *dback;
8160         struct btrfs_tree_block_info *bi;
8161
8162         if (!back->is_data)
8163                 rec->max_size = max_t(u64, rec->max_size,
8164                                     info->nodesize);
8165
8166         if (!allocated) {
8167                 u32 item_size = sizeof(*ei);
8168
8169                 if (!back->is_data)
8170                         item_size += sizeof(*bi);
8171
8172                 ins_key.objectid = rec->start;
8173                 ins_key.offset = rec->max_size;
8174                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8175
8176                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8177                                         &ins_key, item_size);
8178                 if (ret)
8179                         goto fail;
8180
8181                 leaf = path->nodes[0];
8182                 ei = btrfs_item_ptr(leaf, path->slots[0],
8183                                     struct btrfs_extent_item);
8184
8185                 btrfs_set_extent_refs(leaf, ei, 0);
8186                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8187
8188                 if (back->is_data) {
8189                         btrfs_set_extent_flags(leaf, ei,
8190                                                BTRFS_EXTENT_FLAG_DATA);
8191                 } else {
8192                         struct btrfs_disk_key copy_key;;
8193
8194                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8195                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8196                                              sizeof(*bi));
8197
8198                         btrfs_set_disk_key_objectid(&copy_key,
8199                                                     rec->info_objectid);
8200                         btrfs_set_disk_key_type(&copy_key, 0);
8201                         btrfs_set_disk_key_offset(&copy_key, 0);
8202
8203                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8204                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8205
8206                         btrfs_set_extent_flags(leaf, ei,
8207                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8208                 }
8209
8210                 btrfs_mark_buffer_dirty(leaf);
8211                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8212                                                rec->max_size, 1, 0);
8213                 if (ret)
8214                         goto fail;
8215                 btrfs_release_path(path);
8216         }
8217
8218         if (back->is_data) {
8219                 u64 parent;
8220                 int i;
8221
8222                 dback = to_data_backref(back);
8223                 if (back->full_backref)
8224                         parent = dback->parent;
8225                 else
8226                         parent = 0;
8227
8228                 for (i = 0; i < dback->found_ref; i++) {
8229                         /* if parent != 0, we're doing a full backref
8230                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8231                          * just makes the backref allocator create a data
8232                          * backref
8233                          */
8234                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8235                                                    rec->start, rec->max_size,
8236                                                    parent,
8237                                                    dback->root,
8238                                                    parent ?
8239                                                    BTRFS_FIRST_FREE_OBJECTID :
8240                                                    dback->owner,
8241                                                    dback->offset);
8242                         if (ret)
8243                                 break;
8244                 }
8245                 fprintf(stderr, "adding new data backref"
8246                                 " on %llu %s %llu owner %llu"
8247                                 " offset %llu found %d\n",
8248                                 (unsigned long long)rec->start,
8249                                 back->full_backref ?
8250                                 "parent" : "root",
8251                                 back->full_backref ?
8252                                 (unsigned long long)parent :
8253                                 (unsigned long long)dback->root,
8254                                 (unsigned long long)dback->owner,
8255                                 (unsigned long long)dback->offset,
8256                                 dback->found_ref);
8257         } else {
8258                 u64 parent;
8259                 struct tree_backref *tback;
8260
8261                 tback = to_tree_backref(back);
8262                 if (back->full_backref)
8263                         parent = tback->parent;
8264                 else
8265                         parent = 0;
8266
8267                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8268                                            rec->start, rec->max_size,
8269                                            parent, tback->root, 0, 0);
8270                 fprintf(stderr, "adding new tree backref on "
8271                         "start %llu len %llu parent %llu root %llu\n",
8272                         rec->start, rec->max_size, parent, tback->root);
8273         }
8274 fail:
8275         btrfs_release_path(path);
8276         return ret;
8277 }
8278
8279 static struct extent_entry *find_entry(struct list_head *entries,
8280                                        u64 bytenr, u64 bytes)
8281 {
8282         struct extent_entry *entry = NULL;
8283
8284         list_for_each_entry(entry, entries, list) {
8285                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8286                         return entry;
8287         }
8288
8289         return NULL;
8290 }
8291
8292 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8293 {
8294         struct extent_entry *entry, *best = NULL, *prev = NULL;
8295
8296         list_for_each_entry(entry, entries, list) {
8297                 /*
8298                  * If there are as many broken entries as entries then we know
8299                  * not to trust this particular entry.
8300                  */
8301                 if (entry->broken == entry->count)
8302                         continue;
8303
8304                 /*
8305                  * Special case, when there are only two entries and 'best' is
8306                  * the first one
8307                  */
8308                 if (!prev) {
8309                         best = entry;
8310                         prev = entry;
8311                         continue;
8312                 }
8313
8314                 /*
8315                  * If our current entry == best then we can't be sure our best
8316                  * is really the best, so we need to keep searching.
8317                  */
8318                 if (best && best->count == entry->count) {
8319                         prev = entry;
8320                         best = NULL;
8321                         continue;
8322                 }
8323
8324                 /* Prev == entry, not good enough, have to keep searching */
8325                 if (!prev->broken && prev->count == entry->count)
8326                         continue;
8327
8328                 if (!best)
8329                         best = (prev->count > entry->count) ? prev : entry;
8330                 else if (best->count < entry->count)
8331                         best = entry;
8332                 prev = entry;
8333         }
8334
8335         return best;
8336 }
8337
8338 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8339                       struct data_backref *dback, struct extent_entry *entry)
8340 {
8341         struct btrfs_trans_handle *trans;
8342         struct btrfs_root *root;
8343         struct btrfs_file_extent_item *fi;
8344         struct extent_buffer *leaf;
8345         struct btrfs_key key;
8346         u64 bytenr, bytes;
8347         int ret, err;
8348
8349         key.objectid = dback->root;
8350         key.type = BTRFS_ROOT_ITEM_KEY;
8351         key.offset = (u64)-1;
8352         root = btrfs_read_fs_root(info, &key);
8353         if (IS_ERR(root)) {
8354                 fprintf(stderr, "Couldn't find root for our ref\n");
8355                 return -EINVAL;
8356         }
8357
8358         /*
8359          * The backref points to the original offset of the extent if it was
8360          * split, so we need to search down to the offset we have and then walk
8361          * forward until we find the backref we're looking for.
8362          */
8363         key.objectid = dback->owner;
8364         key.type = BTRFS_EXTENT_DATA_KEY;
8365         key.offset = dback->offset;
8366         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8367         if (ret < 0) {
8368                 fprintf(stderr, "Error looking up ref %d\n", ret);
8369                 return ret;
8370         }
8371
8372         while (1) {
8373                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8374                         ret = btrfs_next_leaf(root, path);
8375                         if (ret) {
8376                                 fprintf(stderr, "Couldn't find our ref, next\n");
8377                                 return -EINVAL;
8378                         }
8379                 }
8380                 leaf = path->nodes[0];
8381                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8382                 if (key.objectid != dback->owner ||
8383                     key.type != BTRFS_EXTENT_DATA_KEY) {
8384                         fprintf(stderr, "Couldn't find our ref, search\n");
8385                         return -EINVAL;
8386                 }
8387                 fi = btrfs_item_ptr(leaf, path->slots[0],
8388                                     struct btrfs_file_extent_item);
8389                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8390                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8391
8392                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8393                         break;
8394                 path->slots[0]++;
8395         }
8396
8397         btrfs_release_path(path);
8398
8399         trans = btrfs_start_transaction(root, 1);
8400         if (IS_ERR(trans))
8401                 return PTR_ERR(trans);
8402
8403         /*
8404          * Ok we have the key of the file extent we want to fix, now we can cow
8405          * down to the thing and fix it.
8406          */
8407         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8408         if (ret < 0) {
8409                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8410                         key.objectid, key.type, key.offset, ret);
8411                 goto out;
8412         }
8413         if (ret > 0) {
8414                 fprintf(stderr, "Well that's odd, we just found this key "
8415                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8416                         key.offset);
8417                 ret = -EINVAL;
8418                 goto out;
8419         }
8420         leaf = path->nodes[0];
8421         fi = btrfs_item_ptr(leaf, path->slots[0],
8422                             struct btrfs_file_extent_item);
8423
8424         if (btrfs_file_extent_compression(leaf, fi) &&
8425             dback->disk_bytenr != entry->bytenr) {
8426                 fprintf(stderr, "Ref doesn't match the record start and is "
8427                         "compressed, please take a btrfs-image of this file "
8428                         "system and send it to a btrfs developer so they can "
8429                         "complete this functionality for bytenr %Lu\n",
8430                         dback->disk_bytenr);
8431                 ret = -EINVAL;
8432                 goto out;
8433         }
8434
8435         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8436                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8437         } else if (dback->disk_bytenr > entry->bytenr) {
8438                 u64 off_diff, offset;
8439
8440                 off_diff = dback->disk_bytenr - entry->bytenr;
8441                 offset = btrfs_file_extent_offset(leaf, fi);
8442                 if (dback->disk_bytenr + offset +
8443                     btrfs_file_extent_num_bytes(leaf, fi) >
8444                     entry->bytenr + entry->bytes) {
8445                         fprintf(stderr, "Ref is past the entry end, please "
8446                                 "take a btrfs-image of this file system and "
8447                                 "send it to a btrfs developer, ref %Lu\n",
8448                                 dback->disk_bytenr);
8449                         ret = -EINVAL;
8450                         goto out;
8451                 }
8452                 offset += off_diff;
8453                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8454                 btrfs_set_file_extent_offset(leaf, fi, offset);
8455         } else if (dback->disk_bytenr < entry->bytenr) {
8456                 u64 offset;
8457
8458                 offset = btrfs_file_extent_offset(leaf, fi);
8459                 if (dback->disk_bytenr + offset < entry->bytenr) {
8460                         fprintf(stderr, "Ref is before the entry start, please"
8461                                 " take a btrfs-image of this file system and "
8462                                 "send it to a btrfs developer, ref %Lu\n",
8463                                 dback->disk_bytenr);
8464                         ret = -EINVAL;
8465                         goto out;
8466                 }
8467
8468                 offset += dback->disk_bytenr;
8469                 offset -= entry->bytenr;
8470                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8471                 btrfs_set_file_extent_offset(leaf, fi, offset);
8472         }
8473
8474         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8475
8476         /*
8477          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8478          * only do this if we aren't using compression, otherwise it's a
8479          * trickier case.
8480          */
8481         if (!btrfs_file_extent_compression(leaf, fi))
8482                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8483         else
8484                 printf("ram bytes may be wrong?\n");
8485         btrfs_mark_buffer_dirty(leaf);
8486 out:
8487         err = btrfs_commit_transaction(trans, root);
8488         btrfs_release_path(path);
8489         return ret ? ret : err;
8490 }
8491
8492 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8493                            struct extent_record *rec)
8494 {
8495         struct extent_backref *back;
8496         struct data_backref *dback;
8497         struct extent_entry *entry, *best = NULL;
8498         LIST_HEAD(entries);
8499         int nr_entries = 0;
8500         int broken_entries = 0;
8501         int ret = 0;
8502         short mismatch = 0;
8503
8504         /*
8505          * Metadata is easy and the backrefs should always agree on bytenr and
8506          * size, if not we've got bigger issues.
8507          */
8508         if (rec->metadata)
8509                 return 0;
8510
8511         list_for_each_entry(back, &rec->backrefs, list) {
8512                 if (back->full_backref || !back->is_data)
8513                         continue;
8514
8515                 dback = to_data_backref(back);
8516
8517                 /*
8518                  * We only pay attention to backrefs that we found a real
8519                  * backref for.
8520                  */
8521                 if (dback->found_ref == 0)
8522                         continue;
8523
8524                 /*
8525                  * For now we only catch when the bytes don't match, not the
8526                  * bytenr.  We can easily do this at the same time, but I want
8527                  * to have a fs image to test on before we just add repair
8528                  * functionality willy-nilly so we know we won't screw up the
8529                  * repair.
8530                  */
8531
8532                 entry = find_entry(&entries, dback->disk_bytenr,
8533                                    dback->bytes);
8534                 if (!entry) {
8535                         entry = malloc(sizeof(struct extent_entry));
8536                         if (!entry) {
8537                                 ret = -ENOMEM;
8538                                 goto out;
8539                         }
8540                         memset(entry, 0, sizeof(*entry));
8541                         entry->bytenr = dback->disk_bytenr;
8542                         entry->bytes = dback->bytes;
8543                         list_add_tail(&entry->list, &entries);
8544                         nr_entries++;
8545                 }
8546
8547                 /*
8548                  * If we only have on entry we may think the entries agree when
8549                  * in reality they don't so we have to do some extra checking.
8550                  */
8551                 if (dback->disk_bytenr != rec->start ||
8552                     dback->bytes != rec->nr || back->broken)
8553                         mismatch = 1;
8554
8555                 if (back->broken) {
8556                         entry->broken++;
8557                         broken_entries++;
8558                 }
8559
8560                 entry->count++;
8561         }
8562
8563         /* Yay all the backrefs agree, carry on good sir */
8564         if (nr_entries <= 1 && !mismatch)
8565                 goto out;
8566
8567         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8568                 "%Lu\n", rec->start);
8569
8570         /*
8571          * First we want to see if the backrefs can agree amongst themselves who
8572          * is right, so figure out which one of the entries has the highest
8573          * count.
8574          */
8575         best = find_most_right_entry(&entries);
8576
8577         /*
8578          * Ok so we may have an even split between what the backrefs think, so
8579          * this is where we use the extent ref to see what it thinks.
8580          */
8581         if (!best) {
8582                 entry = find_entry(&entries, rec->start, rec->nr);
8583                 if (!entry && (!broken_entries || !rec->found_rec)) {
8584                         fprintf(stderr, "Backrefs don't agree with each other "
8585                                 "and extent record doesn't agree with anybody,"
8586                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8587                                 rec->start, rec->nr);
8588                         ret = -EINVAL;
8589                         goto out;
8590                 } else if (!entry) {
8591                         /*
8592                          * Ok our backrefs were broken, we'll assume this is the
8593                          * correct value and add an entry for this range.
8594                          */
8595                         entry = malloc(sizeof(struct extent_entry));
8596                         if (!entry) {
8597                                 ret = -ENOMEM;
8598                                 goto out;
8599                         }
8600                         memset(entry, 0, sizeof(*entry));
8601                         entry->bytenr = rec->start;
8602                         entry->bytes = rec->nr;
8603                         list_add_tail(&entry->list, &entries);
8604                         nr_entries++;
8605                 }
8606                 entry->count++;
8607                 best = find_most_right_entry(&entries);
8608                 if (!best) {
8609                         fprintf(stderr, "Backrefs and extent record evenly "
8610                                 "split on who is right, this is going to "
8611                                 "require user input to fix bytenr %Lu bytes "
8612                                 "%Lu\n", rec->start, rec->nr);
8613                         ret = -EINVAL;
8614                         goto out;
8615                 }
8616         }
8617
8618         /*
8619          * I don't think this can happen currently as we'll abort() if we catch
8620          * this case higher up, but in case somebody removes that we still can't
8621          * deal with it properly here yet, so just bail out of that's the case.
8622          */
8623         if (best->bytenr != rec->start) {
8624                 fprintf(stderr, "Extent start and backref starts don't match, "
8625                         "please use btrfs-image on this file system and send "
8626                         "it to a btrfs developer so they can make fsck fix "
8627                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8628                         rec->start, rec->nr);
8629                 ret = -EINVAL;
8630                 goto out;
8631         }
8632
8633         /*
8634          * Ok great we all agreed on an extent record, let's go find the real
8635          * references and fix up the ones that don't match.
8636          */
8637         list_for_each_entry(back, &rec->backrefs, list) {
8638                 if (back->full_backref || !back->is_data)
8639                         continue;
8640
8641                 dback = to_data_backref(back);
8642
8643                 /*
8644                  * Still ignoring backrefs that don't have a real ref attached
8645                  * to them.
8646                  */
8647                 if (dback->found_ref == 0)
8648                         continue;
8649
8650                 if (dback->bytes == best->bytes &&
8651                     dback->disk_bytenr == best->bytenr)
8652                         continue;
8653
8654                 ret = repair_ref(info, path, dback, best);
8655                 if (ret)
8656                         goto out;
8657         }
8658
8659         /*
8660          * Ok we messed with the actual refs, which means we need to drop our
8661          * entire cache and go back and rescan.  I know this is a huge pain and
8662          * adds a lot of extra work, but it's the only way to be safe.  Once all
8663          * the backrefs agree we may not need to do anything to the extent
8664          * record itself.
8665          */
8666         ret = -EAGAIN;
8667 out:
8668         while (!list_empty(&entries)) {
8669                 entry = list_entry(entries.next, struct extent_entry, list);
8670                 list_del_init(&entry->list);
8671                 free(entry);
8672         }
8673         return ret;
8674 }
8675
8676 static int process_duplicates(struct cache_tree *extent_cache,
8677                               struct extent_record *rec)
8678 {
8679         struct extent_record *good, *tmp;
8680         struct cache_extent *cache;
8681         int ret;
8682
8683         /*
8684          * If we found a extent record for this extent then return, or if we
8685          * have more than one duplicate we are likely going to need to delete
8686          * something.
8687          */
8688         if (rec->found_rec || rec->num_duplicates > 1)
8689                 return 0;
8690
8691         /* Shouldn't happen but just in case */
8692         BUG_ON(!rec->num_duplicates);
8693
8694         /*
8695          * So this happens if we end up with a backref that doesn't match the
8696          * actual extent entry.  So either the backref is bad or the extent
8697          * entry is bad.  Either way we want to have the extent_record actually
8698          * reflect what we found in the extent_tree, so we need to take the
8699          * duplicate out and use that as the extent_record since the only way we
8700          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8701          */
8702         remove_cache_extent(extent_cache, &rec->cache);
8703
8704         good = to_extent_record(rec->dups.next);
8705         list_del_init(&good->list);
8706         INIT_LIST_HEAD(&good->backrefs);
8707         INIT_LIST_HEAD(&good->dups);
8708         good->cache.start = good->start;
8709         good->cache.size = good->nr;
8710         good->content_checked = 0;
8711         good->owner_ref_checked = 0;
8712         good->num_duplicates = 0;
8713         good->refs = rec->refs;
8714         list_splice_init(&rec->backrefs, &good->backrefs);
8715         while (1) {
8716                 cache = lookup_cache_extent(extent_cache, good->start,
8717                                             good->nr);
8718                 if (!cache)
8719                         break;
8720                 tmp = container_of(cache, struct extent_record, cache);
8721
8722                 /*
8723                  * If we find another overlapping extent and it's found_rec is
8724                  * set then it's a duplicate and we need to try and delete
8725                  * something.
8726                  */
8727                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8728                         if (list_empty(&good->list))
8729                                 list_add_tail(&good->list,
8730                                               &duplicate_extents);
8731                         good->num_duplicates += tmp->num_duplicates + 1;
8732                         list_splice_init(&tmp->dups, &good->dups);
8733                         list_del_init(&tmp->list);
8734                         list_add_tail(&tmp->list, &good->dups);
8735                         remove_cache_extent(extent_cache, &tmp->cache);
8736                         continue;
8737                 }
8738
8739                 /*
8740                  * Ok we have another non extent item backed extent rec, so lets
8741                  * just add it to this extent and carry on like we did above.
8742                  */
8743                 good->refs += tmp->refs;
8744                 list_splice_init(&tmp->backrefs, &good->backrefs);
8745                 remove_cache_extent(extent_cache, &tmp->cache);
8746                 free(tmp);
8747         }
8748         ret = insert_cache_extent(extent_cache, &good->cache);
8749         BUG_ON(ret);
8750         free(rec);
8751         return good->num_duplicates ? 0 : 1;
8752 }
8753
8754 static int delete_duplicate_records(struct btrfs_root *root,
8755                                     struct extent_record *rec)
8756 {
8757         struct btrfs_trans_handle *trans;
8758         LIST_HEAD(delete_list);
8759         struct btrfs_path path;
8760         struct extent_record *tmp, *good, *n;
8761         int nr_del = 0;
8762         int ret = 0, err;
8763         struct btrfs_key key;
8764
8765         btrfs_init_path(&path);
8766
8767         good = rec;
8768         /* Find the record that covers all of the duplicates. */
8769         list_for_each_entry(tmp, &rec->dups, list) {
8770                 if (good->start < tmp->start)
8771                         continue;
8772                 if (good->nr > tmp->nr)
8773                         continue;
8774
8775                 if (tmp->start + tmp->nr < good->start + good->nr) {
8776                         fprintf(stderr, "Ok we have overlapping extents that "
8777                                 "aren't completely covered by each other, this "
8778                                 "is going to require more careful thought.  "
8779                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8780                                 tmp->start, tmp->nr, good->start, good->nr);
8781                         abort();
8782                 }
8783                 good = tmp;
8784         }
8785
8786         if (good != rec)
8787                 list_add_tail(&rec->list, &delete_list);
8788
8789         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8790                 if (tmp == good)
8791                         continue;
8792                 list_move_tail(&tmp->list, &delete_list);
8793         }
8794
8795         root = root->fs_info->extent_root;
8796         trans = btrfs_start_transaction(root, 1);
8797         if (IS_ERR(trans)) {
8798                 ret = PTR_ERR(trans);
8799                 goto out;
8800         }
8801
8802         list_for_each_entry(tmp, &delete_list, list) {
8803                 if (tmp->found_rec == 0)
8804                         continue;
8805                 key.objectid = tmp->start;
8806                 key.type = BTRFS_EXTENT_ITEM_KEY;
8807                 key.offset = tmp->nr;
8808
8809                 /* Shouldn't happen but just in case */
8810                 if (tmp->metadata) {
8811                         fprintf(stderr, "Well this shouldn't happen, extent "
8812                                 "record overlaps but is metadata? "
8813                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8814                         abort();
8815                 }
8816
8817                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8818                 if (ret) {
8819                         if (ret > 0)
8820                                 ret = -EINVAL;
8821                         break;
8822                 }
8823                 ret = btrfs_del_item(trans, root, &path);
8824                 if (ret)
8825                         break;
8826                 btrfs_release_path(&path);
8827                 nr_del++;
8828         }
8829         err = btrfs_commit_transaction(trans, root);
8830         if (err && !ret)
8831                 ret = err;
8832 out:
8833         while (!list_empty(&delete_list)) {
8834                 tmp = to_extent_record(delete_list.next);
8835                 list_del_init(&tmp->list);
8836                 if (tmp == rec)
8837                         continue;
8838                 free(tmp);
8839         }
8840
8841         while (!list_empty(&rec->dups)) {
8842                 tmp = to_extent_record(rec->dups.next);
8843                 list_del_init(&tmp->list);
8844                 free(tmp);
8845         }
8846
8847         btrfs_release_path(&path);
8848
8849         if (!ret && !nr_del)
8850                 rec->num_duplicates = 0;
8851
8852         return ret ? ret : nr_del;
8853 }
8854
8855 static int find_possible_backrefs(struct btrfs_fs_info *info,
8856                                   struct btrfs_path *path,
8857                                   struct cache_tree *extent_cache,
8858                                   struct extent_record *rec)
8859 {
8860         struct btrfs_root *root;
8861         struct extent_backref *back;
8862         struct data_backref *dback;
8863         struct cache_extent *cache;
8864         struct btrfs_file_extent_item *fi;
8865         struct btrfs_key key;
8866         u64 bytenr, bytes;
8867         int ret;
8868
8869         list_for_each_entry(back, &rec->backrefs, list) {
8870                 /* Don't care about full backrefs (poor unloved backrefs) */
8871                 if (back->full_backref || !back->is_data)
8872                         continue;
8873
8874                 dback = to_data_backref(back);
8875
8876                 /* We found this one, we don't need to do a lookup */
8877                 if (dback->found_ref)
8878                         continue;
8879
8880                 key.objectid = dback->root;
8881                 key.type = BTRFS_ROOT_ITEM_KEY;
8882                 key.offset = (u64)-1;
8883
8884                 root = btrfs_read_fs_root(info, &key);
8885
8886                 /* No root, definitely a bad ref, skip */
8887                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8888                         continue;
8889                 /* Other err, exit */
8890                 if (IS_ERR(root))
8891                         return PTR_ERR(root);
8892
8893                 key.objectid = dback->owner;
8894                 key.type = BTRFS_EXTENT_DATA_KEY;
8895                 key.offset = dback->offset;
8896                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8897                 if (ret) {
8898                         btrfs_release_path(path);
8899                         if (ret < 0)
8900                                 return ret;
8901                         /* Didn't find it, we can carry on */
8902                         ret = 0;
8903                         continue;
8904                 }
8905
8906                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8907                                     struct btrfs_file_extent_item);
8908                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8909                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8910                 btrfs_release_path(path);
8911                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8912                 if (cache) {
8913                         struct extent_record *tmp;
8914                         tmp = container_of(cache, struct extent_record, cache);
8915
8916                         /*
8917                          * If we found an extent record for the bytenr for this
8918                          * particular backref then we can't add it to our
8919                          * current extent record.  We only want to add backrefs
8920                          * that don't have a corresponding extent item in the
8921                          * extent tree since they likely belong to this record
8922                          * and we need to fix it if it doesn't match bytenrs.
8923                          */
8924                         if  (tmp->found_rec)
8925                                 continue;
8926                 }
8927
8928                 dback->found_ref += 1;
8929                 dback->disk_bytenr = bytenr;
8930                 dback->bytes = bytes;
8931
8932                 /*
8933                  * Set this so the verify backref code knows not to trust the
8934                  * values in this backref.
8935                  */
8936                 back->broken = 1;
8937         }
8938
8939         return 0;
8940 }
8941
8942 /*
8943  * Record orphan data ref into corresponding root.
8944  *
8945  * Return 0 if the extent item contains data ref and recorded.
8946  * Return 1 if the extent item contains no useful data ref
8947  *   On that case, it may contains only shared_dataref or metadata backref
8948  *   or the file extent exists(this should be handled by the extent bytenr
8949  *   recovery routine)
8950  * Return <0 if something goes wrong.
8951  */
8952 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8953                                       struct extent_record *rec)
8954 {
8955         struct btrfs_key key;
8956         struct btrfs_root *dest_root;
8957         struct extent_backref *back;
8958         struct data_backref *dback;
8959         struct orphan_data_extent *orphan;
8960         struct btrfs_path path;
8961         int recorded_data_ref = 0;
8962         int ret = 0;
8963
8964         if (rec->metadata)
8965                 return 1;
8966         btrfs_init_path(&path);
8967         list_for_each_entry(back, &rec->backrefs, list) {
8968                 if (back->full_backref || !back->is_data ||
8969                     !back->found_extent_tree)
8970                         continue;
8971                 dback = to_data_backref(back);
8972                 if (dback->found_ref)
8973                         continue;
8974                 key.objectid = dback->root;
8975                 key.type = BTRFS_ROOT_ITEM_KEY;
8976                 key.offset = (u64)-1;
8977
8978                 dest_root = btrfs_read_fs_root(fs_info, &key);
8979
8980                 /* For non-exist root we just skip it */
8981                 if (IS_ERR(dest_root) || !dest_root)
8982                         continue;
8983
8984                 key.objectid = dback->owner;
8985                 key.type = BTRFS_EXTENT_DATA_KEY;
8986                 key.offset = dback->offset;
8987
8988                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8989                 btrfs_release_path(&path);
8990                 /*
8991                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8992                  * we need to record it for inode/file extent rebuild.
8993                  * For ret > 0, we record it only for file extent rebuild.
8994                  * For ret == 0, the file extent exists but only bytenr
8995                  * mismatch, let the original bytenr fix routine to handle,
8996                  * don't record it.
8997                  */
8998                 if (ret == 0)
8999                         continue;
9000                 ret = 0;
9001                 orphan = malloc(sizeof(*orphan));
9002                 if (!orphan) {
9003                         ret = -ENOMEM;
9004                         goto out;
9005                 }
9006                 INIT_LIST_HEAD(&orphan->list);
9007                 orphan->root = dback->root;
9008                 orphan->objectid = dback->owner;
9009                 orphan->offset = dback->offset;
9010                 orphan->disk_bytenr = rec->cache.start;
9011                 orphan->disk_len = rec->cache.size;
9012                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9013                 recorded_data_ref = 1;
9014         }
9015 out:
9016         btrfs_release_path(&path);
9017         if (!ret)
9018                 return !recorded_data_ref;
9019         else
9020                 return ret;
9021 }
9022
9023 /*
9024  * when an incorrect extent item is found, this will delete
9025  * all of the existing entries for it and recreate them
9026  * based on what the tree scan found.
9027  */
9028 static int fixup_extent_refs(struct btrfs_fs_info *info,
9029                              struct cache_tree *extent_cache,
9030                              struct extent_record *rec)
9031 {
9032         struct btrfs_trans_handle *trans = NULL;
9033         int ret;
9034         struct btrfs_path path;
9035         struct list_head *cur = rec->backrefs.next;
9036         struct cache_extent *cache;
9037         struct extent_backref *back;
9038         int allocated = 0;
9039         u64 flags = 0;
9040
9041         if (rec->flag_block_full_backref)
9042                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9043
9044         btrfs_init_path(&path);
9045         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9046                 /*
9047                  * Sometimes the backrefs themselves are so broken they don't
9048                  * get attached to any meaningful rec, so first go back and
9049                  * check any of our backrefs that we couldn't find and throw
9050                  * them into the list if we find the backref so that
9051                  * verify_backrefs can figure out what to do.
9052                  */
9053                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9054                 if (ret < 0)
9055                         goto out;
9056         }
9057
9058         /* step one, make sure all of the backrefs agree */
9059         ret = verify_backrefs(info, &path, rec);
9060         if (ret < 0)
9061                 goto out;
9062
9063         trans = btrfs_start_transaction(info->extent_root, 1);
9064         if (IS_ERR(trans)) {
9065                 ret = PTR_ERR(trans);
9066                 goto out;
9067         }
9068
9069         /* step two, delete all the existing records */
9070         ret = delete_extent_records(trans, info->extent_root, &path,
9071                                     rec->start);
9072
9073         if (ret < 0)
9074                 goto out;
9075
9076         /* was this block corrupt?  If so, don't add references to it */
9077         cache = lookup_cache_extent(info->corrupt_blocks,
9078                                     rec->start, rec->max_size);
9079         if (cache) {
9080                 ret = 0;
9081                 goto out;
9082         }
9083
9084         /* step three, recreate all the refs we did find */
9085         while(cur != &rec->backrefs) {
9086                 back = to_extent_backref(cur);
9087                 cur = cur->next;
9088
9089                 /*
9090                  * if we didn't find any references, don't create a
9091                  * new extent record
9092                  */
9093                 if (!back->found_ref)
9094                         continue;
9095
9096                 rec->bad_full_backref = 0;
9097                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9098                 allocated = 1;
9099
9100                 if (ret)
9101                         goto out;
9102         }
9103 out:
9104         if (trans) {
9105                 int err = btrfs_commit_transaction(trans, info->extent_root);
9106                 if (!ret)
9107                         ret = err;
9108         }
9109
9110         if (!ret)
9111                 fprintf(stderr, "Repaired extent references for %llu\n",
9112                                 (unsigned long long)rec->start);
9113
9114         btrfs_release_path(&path);
9115         return ret;
9116 }
9117
9118 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9119                               struct extent_record *rec)
9120 {
9121         struct btrfs_trans_handle *trans;
9122         struct btrfs_root *root = fs_info->extent_root;
9123         struct btrfs_path path;
9124         struct btrfs_extent_item *ei;
9125         struct btrfs_key key;
9126         u64 flags;
9127         int ret = 0;
9128
9129         key.objectid = rec->start;
9130         if (rec->metadata) {
9131                 key.type = BTRFS_METADATA_ITEM_KEY;
9132                 key.offset = rec->info_level;
9133         } else {
9134                 key.type = BTRFS_EXTENT_ITEM_KEY;
9135                 key.offset = rec->max_size;
9136         }
9137
9138         trans = btrfs_start_transaction(root, 0);
9139         if (IS_ERR(trans))
9140                 return PTR_ERR(trans);
9141
9142         btrfs_init_path(&path);
9143         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9144         if (ret < 0) {
9145                 btrfs_release_path(&path);
9146                 btrfs_commit_transaction(trans, root);
9147                 return ret;
9148         } else if (ret) {
9149                 fprintf(stderr, "Didn't find extent for %llu\n",
9150                         (unsigned long long)rec->start);
9151                 btrfs_release_path(&path);
9152                 btrfs_commit_transaction(trans, root);
9153                 return -ENOENT;
9154         }
9155
9156         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9157                             struct btrfs_extent_item);
9158         flags = btrfs_extent_flags(path.nodes[0], ei);
9159         if (rec->flag_block_full_backref) {
9160                 fprintf(stderr, "setting full backref on %llu\n",
9161                         (unsigned long long)key.objectid);
9162                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9163         } else {
9164                 fprintf(stderr, "clearing full backref on %llu\n",
9165                         (unsigned long long)key.objectid);
9166                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9167         }
9168         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9169         btrfs_mark_buffer_dirty(path.nodes[0]);
9170         btrfs_release_path(&path);
9171         ret = btrfs_commit_transaction(trans, root);
9172         if (!ret)
9173                 fprintf(stderr, "Repaired extent flags for %llu\n",
9174                                 (unsigned long long)rec->start);
9175
9176         return ret;
9177 }
9178
9179 /* right now we only prune from the extent allocation tree */
9180 static int prune_one_block(struct btrfs_trans_handle *trans,
9181                            struct btrfs_fs_info *info,
9182                            struct btrfs_corrupt_block *corrupt)
9183 {
9184         int ret;
9185         struct btrfs_path path;
9186         struct extent_buffer *eb;
9187         u64 found;
9188         int slot;
9189         int nritems;
9190         int level = corrupt->level + 1;
9191
9192         btrfs_init_path(&path);
9193 again:
9194         /* we want to stop at the parent to our busted block */
9195         path.lowest_level = level;
9196
9197         ret = btrfs_search_slot(trans, info->extent_root,
9198                                 &corrupt->key, &path, -1, 1);
9199
9200         if (ret < 0)
9201                 goto out;
9202
9203         eb = path.nodes[level];
9204         if (!eb) {
9205                 ret = -ENOENT;
9206                 goto out;
9207         }
9208
9209         /*
9210          * hopefully the search gave us the block we want to prune,
9211          * lets try that first
9212          */
9213         slot = path.slots[level];
9214         found =  btrfs_node_blockptr(eb, slot);
9215         if (found == corrupt->cache.start)
9216                 goto del_ptr;
9217
9218         nritems = btrfs_header_nritems(eb);
9219
9220         /* the search failed, lets scan this node and hope we find it */
9221         for (slot = 0; slot < nritems; slot++) {
9222                 found =  btrfs_node_blockptr(eb, slot);
9223                 if (found == corrupt->cache.start)
9224                         goto del_ptr;
9225         }
9226         /*
9227          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9228          * to this block
9229          */
9230         if (eb == info->extent_root->node) {
9231                 ret = -ENOENT;
9232                 goto out;
9233         } else {
9234                 level++;
9235                 btrfs_release_path(&path);
9236                 goto again;
9237         }
9238
9239 del_ptr:
9240         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9241         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9242
9243 out:
9244         btrfs_release_path(&path);
9245         return ret;
9246 }
9247
9248 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9249 {
9250         struct btrfs_trans_handle *trans = NULL;
9251         struct cache_extent *cache;
9252         struct btrfs_corrupt_block *corrupt;
9253
9254         while (1) {
9255                 cache = search_cache_extent(info->corrupt_blocks, 0);
9256                 if (!cache)
9257                         break;
9258                 if (!trans) {
9259                         trans = btrfs_start_transaction(info->extent_root, 1);
9260                         if (IS_ERR(trans))
9261                                 return PTR_ERR(trans);
9262                 }
9263                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9264                 prune_one_block(trans, info, corrupt);
9265                 remove_cache_extent(info->corrupt_blocks, cache);
9266         }
9267         if (trans)
9268                 return btrfs_commit_transaction(trans, info->extent_root);
9269         return 0;
9270 }
9271
9272 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9273 {
9274         struct btrfs_block_group_cache *cache;
9275         u64 start, end;
9276         int ret;
9277
9278         while (1) {
9279                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9280                                             &start, &end, EXTENT_DIRTY);
9281                 if (ret)
9282                         break;
9283                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9284         }
9285
9286         start = 0;
9287         while (1) {
9288                 cache = btrfs_lookup_first_block_group(fs_info, start);
9289                 if (!cache)
9290                         break;
9291                 if (cache->cached)
9292                         cache->cached = 0;
9293                 start = cache->key.objectid + cache->key.offset;
9294         }
9295 }
9296
9297 static int check_extent_refs(struct btrfs_root *root,
9298                              struct cache_tree *extent_cache)
9299 {
9300         struct extent_record *rec;
9301         struct cache_extent *cache;
9302         int ret = 0;
9303         int had_dups = 0;
9304
9305         if (repair) {
9306                 /*
9307                  * if we're doing a repair, we have to make sure
9308                  * we don't allocate from the problem extents.
9309                  * In the worst case, this will be all the
9310                  * extents in the FS
9311                  */
9312                 cache = search_cache_extent(extent_cache, 0);
9313                 while(cache) {
9314                         rec = container_of(cache, struct extent_record, cache);
9315                         set_extent_dirty(root->fs_info->excluded_extents,
9316                                          rec->start,
9317                                          rec->start + rec->max_size - 1);
9318                         cache = next_cache_extent(cache);
9319                 }
9320
9321                 /* pin down all the corrupted blocks too */
9322                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9323                 while(cache) {
9324                         set_extent_dirty(root->fs_info->excluded_extents,
9325                                          cache->start,
9326                                          cache->start + cache->size - 1);
9327                         cache = next_cache_extent(cache);
9328                 }
9329                 prune_corrupt_blocks(root->fs_info);
9330                 reset_cached_block_groups(root->fs_info);
9331         }
9332
9333         reset_cached_block_groups(root->fs_info);
9334
9335         /*
9336          * We need to delete any duplicate entries we find first otherwise we
9337          * could mess up the extent tree when we have backrefs that actually
9338          * belong to a different extent item and not the weird duplicate one.
9339          */
9340         while (repair && !list_empty(&duplicate_extents)) {
9341                 rec = to_extent_record(duplicate_extents.next);
9342                 list_del_init(&rec->list);
9343
9344                 /* Sometimes we can find a backref before we find an actual
9345                  * extent, so we need to process it a little bit to see if there
9346                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9347                  * if this is a backref screwup.  If we need to delete stuff
9348                  * process_duplicates() will return 0, otherwise it will return
9349                  * 1 and we
9350                  */
9351                 if (process_duplicates(extent_cache, rec))
9352                         continue;
9353                 ret = delete_duplicate_records(root, rec);
9354                 if (ret < 0)
9355                         return ret;
9356                 /*
9357                  * delete_duplicate_records will return the number of entries
9358                  * deleted, so if it's greater than 0 then we know we actually
9359                  * did something and we need to remove.
9360                  */
9361                 if (ret)
9362                         had_dups = 1;
9363         }
9364
9365         if (had_dups)
9366                 return -EAGAIN;
9367
9368         while(1) {
9369                 int cur_err = 0;
9370                 int fix = 0;
9371
9372                 cache = search_cache_extent(extent_cache, 0);
9373                 if (!cache)
9374                         break;
9375                 rec = container_of(cache, struct extent_record, cache);
9376                 if (rec->num_duplicates) {
9377                         fprintf(stderr, "extent item %llu has multiple extent "
9378                                 "items\n", (unsigned long long)rec->start);
9379                         cur_err = 1;
9380                 }
9381
9382                 if (rec->refs != rec->extent_item_refs) {
9383                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9384                                 (unsigned long long)rec->start,
9385                                 (unsigned long long)rec->nr);
9386                         fprintf(stderr, "extent item %llu, found %llu\n",
9387                                 (unsigned long long)rec->extent_item_refs,
9388                                 (unsigned long long)rec->refs);
9389                         ret = record_orphan_data_extents(root->fs_info, rec);
9390                         if (ret < 0)
9391                                 goto repair_abort;
9392                         fix = ret;
9393                         cur_err = 1;
9394                 }
9395                 if (all_backpointers_checked(rec, 1)) {
9396                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9397                                 (unsigned long long)rec->start,
9398                                 (unsigned long long)rec->nr);
9399                         fix = 1;
9400                         cur_err = 1;
9401                 }
9402                 if (!rec->owner_ref_checked) {
9403                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9404                                 (unsigned long long)rec->start,
9405                                 (unsigned long long)rec->nr);
9406                         fix = 1;
9407                         cur_err = 1;
9408                 }
9409
9410                 if (repair && fix) {
9411                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9412                         if (ret)
9413                                 goto repair_abort;
9414                 }
9415
9416
9417                 if (rec->bad_full_backref) {
9418                         fprintf(stderr, "bad full backref, on [%llu]\n",
9419                                 (unsigned long long)rec->start);
9420                         if (repair) {
9421                                 ret = fixup_extent_flags(root->fs_info, rec);
9422                                 if (ret)
9423                                         goto repair_abort;
9424                                 fix = 1;
9425                         }
9426                         cur_err = 1;
9427                 }
9428                 /*
9429                  * Although it's not a extent ref's problem, we reuse this
9430                  * routine for error reporting.
9431                  * No repair function yet.
9432                  */
9433                 if (rec->crossing_stripes) {
9434                         fprintf(stderr,
9435                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9436                                 rec->start, rec->start + rec->max_size);
9437                         cur_err = 1;
9438                 }
9439
9440                 if (rec->wrong_chunk_type) {
9441                         fprintf(stderr,
9442                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9443                                 rec->start, rec->start + rec->max_size);
9444                         cur_err = 1;
9445                 }
9446
9447                 remove_cache_extent(extent_cache, cache);
9448                 free_all_extent_backrefs(rec);
9449                 if (!init_extent_tree && repair && (!cur_err || fix))
9450                         clear_extent_dirty(root->fs_info->excluded_extents,
9451                                            rec->start,
9452                                            rec->start + rec->max_size - 1);
9453                 free(rec);
9454         }
9455 repair_abort:
9456         if (repair) {
9457                 if (ret && ret != -EAGAIN) {
9458                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9459                         exit(1);
9460                 } else if (!ret) {
9461                         struct btrfs_trans_handle *trans;
9462
9463                         root = root->fs_info->extent_root;
9464                         trans = btrfs_start_transaction(root, 1);
9465                         if (IS_ERR(trans)) {
9466                                 ret = PTR_ERR(trans);
9467                                 goto repair_abort;
9468                         }
9469
9470                         btrfs_fix_block_accounting(trans, root);
9471                         ret = btrfs_commit_transaction(trans, root);
9472                         if (ret)
9473                                 goto repair_abort;
9474                 }
9475                 return ret;
9476         }
9477         return 0;
9478 }
9479
9480 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9481 {
9482         u64 stripe_size;
9483
9484         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9485                 stripe_size = length;
9486                 stripe_size /= num_stripes;
9487         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9488                 stripe_size = length * 2;
9489                 stripe_size /= num_stripes;
9490         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9491                 stripe_size = length;
9492                 stripe_size /= (num_stripes - 1);
9493         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9494                 stripe_size = length;
9495                 stripe_size /= (num_stripes - 2);
9496         } else {
9497                 stripe_size = length;
9498         }
9499         return stripe_size;
9500 }
9501
9502 /*
9503  * Check the chunk with its block group/dev list ref:
9504  * Return 0 if all refs seems valid.
9505  * Return 1 if part of refs seems valid, need later check for rebuild ref
9506  * like missing block group and needs to search extent tree to rebuild them.
9507  * Return -1 if essential refs are missing and unable to rebuild.
9508  */
9509 static int check_chunk_refs(struct chunk_record *chunk_rec,
9510                             struct block_group_tree *block_group_cache,
9511                             struct device_extent_tree *dev_extent_cache,
9512                             int silent)
9513 {
9514         struct cache_extent *block_group_item;
9515         struct block_group_record *block_group_rec;
9516         struct cache_extent *dev_extent_item;
9517         struct device_extent_record *dev_extent_rec;
9518         u64 devid;
9519         u64 offset;
9520         u64 length;
9521         int metadump_v2 = 0;
9522         int i;
9523         int ret = 0;
9524
9525         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9526                                                chunk_rec->offset,
9527                                                chunk_rec->length);
9528         if (block_group_item) {
9529                 block_group_rec = container_of(block_group_item,
9530                                                struct block_group_record,
9531                                                cache);
9532                 if (chunk_rec->length != block_group_rec->offset ||
9533                     chunk_rec->offset != block_group_rec->objectid ||
9534                     (!metadump_v2 &&
9535                      chunk_rec->type_flags != block_group_rec->flags)) {
9536                         if (!silent)
9537                                 fprintf(stderr,
9538                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9539                                         chunk_rec->objectid,
9540                                         chunk_rec->type,
9541                                         chunk_rec->offset,
9542                                         chunk_rec->length,
9543                                         chunk_rec->offset,
9544                                         chunk_rec->type_flags,
9545                                         block_group_rec->objectid,
9546                                         block_group_rec->type,
9547                                         block_group_rec->offset,
9548                                         block_group_rec->offset,
9549                                         block_group_rec->objectid,
9550                                         block_group_rec->flags);
9551                         ret = -1;
9552                 } else {
9553                         list_del_init(&block_group_rec->list);
9554                         chunk_rec->bg_rec = block_group_rec;
9555                 }
9556         } else {
9557                 if (!silent)
9558                         fprintf(stderr,
9559                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9560                                 chunk_rec->objectid,
9561                                 chunk_rec->type,
9562                                 chunk_rec->offset,
9563                                 chunk_rec->length,
9564                                 chunk_rec->offset,
9565                                 chunk_rec->type_flags);
9566                 ret = 1;
9567         }
9568
9569         if (metadump_v2)
9570                 return ret;
9571
9572         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9573                                     chunk_rec->num_stripes);
9574         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9575                 devid = chunk_rec->stripes[i].devid;
9576                 offset = chunk_rec->stripes[i].offset;
9577                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9578                                                        devid, offset, length);
9579                 if (dev_extent_item) {
9580                         dev_extent_rec = container_of(dev_extent_item,
9581                                                 struct device_extent_record,
9582                                                 cache);
9583                         if (dev_extent_rec->objectid != devid ||
9584                             dev_extent_rec->offset != offset ||
9585                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9586                             dev_extent_rec->length != length) {
9587                                 if (!silent)
9588                                         fprintf(stderr,
9589                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9590                                                 chunk_rec->objectid,
9591                                                 chunk_rec->type,
9592                                                 chunk_rec->offset,
9593                                                 chunk_rec->stripes[i].devid,
9594                                                 chunk_rec->stripes[i].offset,
9595                                                 dev_extent_rec->objectid,
9596                                                 dev_extent_rec->offset,
9597                                                 dev_extent_rec->length);
9598                                 ret = -1;
9599                         } else {
9600                                 list_move(&dev_extent_rec->chunk_list,
9601                                           &chunk_rec->dextents);
9602                         }
9603                 } else {
9604                         if (!silent)
9605                                 fprintf(stderr,
9606                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9607                                         chunk_rec->objectid,
9608                                         chunk_rec->type,
9609                                         chunk_rec->offset,
9610                                         chunk_rec->stripes[i].devid,
9611                                         chunk_rec->stripes[i].offset);
9612                         ret = -1;
9613                 }
9614         }
9615         return ret;
9616 }
9617
9618 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9619 int check_chunks(struct cache_tree *chunk_cache,
9620                  struct block_group_tree *block_group_cache,
9621                  struct device_extent_tree *dev_extent_cache,
9622                  struct list_head *good, struct list_head *bad,
9623                  struct list_head *rebuild, int silent)
9624 {
9625         struct cache_extent *chunk_item;
9626         struct chunk_record *chunk_rec;
9627         struct block_group_record *bg_rec;
9628         struct device_extent_record *dext_rec;
9629         int err;
9630         int ret = 0;
9631
9632         chunk_item = first_cache_extent(chunk_cache);
9633         while (chunk_item) {
9634                 chunk_rec = container_of(chunk_item, struct chunk_record,
9635                                          cache);
9636                 err = check_chunk_refs(chunk_rec, block_group_cache,
9637                                        dev_extent_cache, silent);
9638                 if (err < 0)
9639                         ret = err;
9640                 if (err == 0 && good)
9641                         list_add_tail(&chunk_rec->list, good);
9642                 if (err > 0 && rebuild)
9643                         list_add_tail(&chunk_rec->list, rebuild);
9644                 if (err < 0 && bad)
9645                         list_add_tail(&chunk_rec->list, bad);
9646                 chunk_item = next_cache_extent(chunk_item);
9647         }
9648
9649         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9650                 if (!silent)
9651                         fprintf(stderr,
9652                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9653                                 bg_rec->objectid,
9654                                 bg_rec->offset,
9655                                 bg_rec->flags);
9656                 if (!ret)
9657                         ret = 1;
9658         }
9659
9660         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9661                             chunk_list) {
9662                 if (!silent)
9663                         fprintf(stderr,
9664                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9665                                 dext_rec->objectid,
9666                                 dext_rec->offset,
9667                                 dext_rec->length);
9668                 if (!ret)
9669                         ret = 1;
9670         }
9671         return ret;
9672 }
9673
9674
9675 static int check_device_used(struct device_record *dev_rec,
9676                              struct device_extent_tree *dext_cache)
9677 {
9678         struct cache_extent *cache;
9679         struct device_extent_record *dev_extent_rec;
9680         u64 total_byte = 0;
9681
9682         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9683         while (cache) {
9684                 dev_extent_rec = container_of(cache,
9685                                               struct device_extent_record,
9686                                               cache);
9687                 if (dev_extent_rec->objectid != dev_rec->devid)
9688                         break;
9689
9690                 list_del_init(&dev_extent_rec->device_list);
9691                 total_byte += dev_extent_rec->length;
9692                 cache = next_cache_extent(cache);
9693         }
9694
9695         if (total_byte != dev_rec->byte_used) {
9696                 fprintf(stderr,
9697                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9698                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9699                         dev_rec->type, dev_rec->offset);
9700                 return -1;
9701         } else {
9702                 return 0;
9703         }
9704 }
9705
9706 /* check btrfs_dev_item -> btrfs_dev_extent */
9707 static int check_devices(struct rb_root *dev_cache,
9708                          struct device_extent_tree *dev_extent_cache)
9709 {
9710         struct rb_node *dev_node;
9711         struct device_record *dev_rec;
9712         struct device_extent_record *dext_rec;
9713         int err;
9714         int ret = 0;
9715
9716         dev_node = rb_first(dev_cache);
9717         while (dev_node) {
9718                 dev_rec = container_of(dev_node, struct device_record, node);
9719                 err = check_device_used(dev_rec, dev_extent_cache);
9720                 if (err)
9721                         ret = err;
9722
9723                 dev_node = rb_next(dev_node);
9724         }
9725         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9726                             device_list) {
9727                 fprintf(stderr,
9728                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9729                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9730                 if (!ret)
9731                         ret = 1;
9732         }
9733         return ret;
9734 }
9735
9736 static int add_root_item_to_list(struct list_head *head,
9737                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9738                                   u8 level, u8 drop_level,
9739                                   struct btrfs_key *drop_key)
9740 {
9741
9742         struct root_item_record *ri_rec;
9743         ri_rec = malloc(sizeof(*ri_rec));
9744         if (!ri_rec)
9745                 return -ENOMEM;
9746         ri_rec->bytenr = bytenr;
9747         ri_rec->objectid = objectid;
9748         ri_rec->level = level;
9749         ri_rec->drop_level = drop_level;
9750         ri_rec->last_snapshot = last_snapshot;
9751         if (drop_key)
9752                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9753         list_add_tail(&ri_rec->list, head);
9754
9755         return 0;
9756 }
9757
9758 static void free_root_item_list(struct list_head *list)
9759 {
9760         struct root_item_record *ri_rec;
9761
9762         while (!list_empty(list)) {
9763                 ri_rec = list_first_entry(list, struct root_item_record,
9764                                           list);
9765                 list_del_init(&ri_rec->list);
9766                 free(ri_rec);
9767         }
9768 }
9769
9770 static int deal_root_from_list(struct list_head *list,
9771                                struct btrfs_root *root,
9772                                struct block_info *bits,
9773                                int bits_nr,
9774                                struct cache_tree *pending,
9775                                struct cache_tree *seen,
9776                                struct cache_tree *reada,
9777                                struct cache_tree *nodes,
9778                                struct cache_tree *extent_cache,
9779                                struct cache_tree *chunk_cache,
9780                                struct rb_root *dev_cache,
9781                                struct block_group_tree *block_group_cache,
9782                                struct device_extent_tree *dev_extent_cache)
9783 {
9784         int ret = 0;
9785         u64 last;
9786
9787         while (!list_empty(list)) {
9788                 struct root_item_record *rec;
9789                 struct extent_buffer *buf;
9790                 rec = list_entry(list->next,
9791                                  struct root_item_record, list);
9792                 last = 0;
9793                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9794                 if (!extent_buffer_uptodate(buf)) {
9795                         free_extent_buffer(buf);
9796                         ret = -EIO;
9797                         break;
9798                 }
9799                 ret = add_root_to_pending(buf, extent_cache, pending,
9800                                     seen, nodes, rec->objectid);
9801                 if (ret < 0)
9802                         break;
9803                 /*
9804                  * To rebuild extent tree, we need deal with snapshot
9805                  * one by one, otherwise we deal with node firstly which
9806                  * can maximize readahead.
9807                  */
9808                 while (1) {
9809                         ret = run_next_block(root, bits, bits_nr, &last,
9810                                              pending, seen, reada, nodes,
9811                                              extent_cache, chunk_cache,
9812                                              dev_cache, block_group_cache,
9813                                              dev_extent_cache, rec);
9814                         if (ret != 0)
9815                                 break;
9816                 }
9817                 free_extent_buffer(buf);
9818                 list_del(&rec->list);
9819                 free(rec);
9820                 if (ret < 0)
9821                         break;
9822         }
9823         while (ret >= 0) {
9824                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9825                                      reada, nodes, extent_cache, chunk_cache,
9826                                      dev_cache, block_group_cache,
9827                                      dev_extent_cache, NULL);
9828                 if (ret != 0) {
9829                         if (ret > 0)
9830                                 ret = 0;
9831                         break;
9832                 }
9833         }
9834         return ret;
9835 }
9836
9837 static int check_chunks_and_extents(struct btrfs_root *root)
9838 {
9839         struct rb_root dev_cache;
9840         struct cache_tree chunk_cache;
9841         struct block_group_tree block_group_cache;
9842         struct device_extent_tree dev_extent_cache;
9843         struct cache_tree extent_cache;
9844         struct cache_tree seen;
9845         struct cache_tree pending;
9846         struct cache_tree reada;
9847         struct cache_tree nodes;
9848         struct extent_io_tree excluded_extents;
9849         struct cache_tree corrupt_blocks;
9850         struct btrfs_path path;
9851         struct btrfs_key key;
9852         struct btrfs_key found_key;
9853         int ret, err = 0;
9854         struct block_info *bits;
9855         int bits_nr;
9856         struct extent_buffer *leaf;
9857         int slot;
9858         struct btrfs_root_item ri;
9859         struct list_head dropping_trees;
9860         struct list_head normal_trees;
9861         struct btrfs_root *root1;
9862         u64 objectid;
9863         u8 level;
9864
9865         dev_cache = RB_ROOT;
9866         cache_tree_init(&chunk_cache);
9867         block_group_tree_init(&block_group_cache);
9868         device_extent_tree_init(&dev_extent_cache);
9869
9870         cache_tree_init(&extent_cache);
9871         cache_tree_init(&seen);
9872         cache_tree_init(&pending);
9873         cache_tree_init(&nodes);
9874         cache_tree_init(&reada);
9875         cache_tree_init(&corrupt_blocks);
9876         extent_io_tree_init(&excluded_extents);
9877         INIT_LIST_HEAD(&dropping_trees);
9878         INIT_LIST_HEAD(&normal_trees);
9879
9880         if (repair) {
9881                 root->fs_info->excluded_extents = &excluded_extents;
9882                 root->fs_info->fsck_extent_cache = &extent_cache;
9883                 root->fs_info->free_extent_hook = free_extent_hook;
9884                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9885         }
9886
9887         bits_nr = 1024;
9888         bits = malloc(bits_nr * sizeof(struct block_info));
9889         if (!bits) {
9890                 perror("malloc");
9891                 exit(1);
9892         }
9893
9894         if (ctx.progress_enabled) {
9895                 ctx.tp = TASK_EXTENTS;
9896                 task_start(ctx.info);
9897         }
9898
9899 again:
9900         root1 = root->fs_info->tree_root;
9901         level = btrfs_header_level(root1->node);
9902         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9903                                     root1->node->start, 0, level, 0, NULL);
9904         if (ret < 0)
9905                 goto out;
9906         root1 = root->fs_info->chunk_root;
9907         level = btrfs_header_level(root1->node);
9908         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9909                                     root1->node->start, 0, level, 0, NULL);
9910         if (ret < 0)
9911                 goto out;
9912         btrfs_init_path(&path);
9913         key.offset = 0;
9914         key.objectid = 0;
9915         key.type = BTRFS_ROOT_ITEM_KEY;
9916         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9917                                         &key, &path, 0, 0);
9918         if (ret < 0)
9919                 goto out;
9920         while(1) {
9921                 leaf = path.nodes[0];
9922                 slot = path.slots[0];
9923                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9924                         ret = btrfs_next_leaf(root, &path);
9925                         if (ret != 0)
9926                                 break;
9927                         leaf = path.nodes[0];
9928                         slot = path.slots[0];
9929                 }
9930                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9931                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9932                         unsigned long offset;
9933                         u64 last_snapshot;
9934
9935                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9936                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9937                         last_snapshot = btrfs_root_last_snapshot(&ri);
9938                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9939                                 level = btrfs_root_level(&ri);
9940                                 ret = add_root_item_to_list(&normal_trees,
9941                                                 found_key.objectid,
9942                                                 btrfs_root_bytenr(&ri),
9943                                                 last_snapshot, level,
9944                                                 0, NULL);
9945                                 if (ret < 0)
9946                                         goto out;
9947                         } else {
9948                                 level = btrfs_root_level(&ri);
9949                                 objectid = found_key.objectid;
9950                                 btrfs_disk_key_to_cpu(&found_key,
9951                                                       &ri.drop_progress);
9952                                 ret = add_root_item_to_list(&dropping_trees,
9953                                                 objectid,
9954                                                 btrfs_root_bytenr(&ri),
9955                                                 last_snapshot, level,
9956                                                 ri.drop_level, &found_key);
9957                                 if (ret < 0)
9958                                         goto out;
9959                         }
9960                 }
9961                 path.slots[0]++;
9962         }
9963         btrfs_release_path(&path);
9964
9965         /*
9966          * check_block can return -EAGAIN if it fixes something, please keep
9967          * this in mind when dealing with return values from these functions, if
9968          * we get -EAGAIN we want to fall through and restart the loop.
9969          */
9970         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9971                                   &seen, &reada, &nodes, &extent_cache,
9972                                   &chunk_cache, &dev_cache, &block_group_cache,
9973                                   &dev_extent_cache);
9974         if (ret < 0) {
9975                 if (ret == -EAGAIN)
9976                         goto loop;
9977                 goto out;
9978         }
9979         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9980                                   &pending, &seen, &reada, &nodes,
9981                                   &extent_cache, &chunk_cache, &dev_cache,
9982                                   &block_group_cache, &dev_extent_cache);
9983         if (ret < 0) {
9984                 if (ret == -EAGAIN)
9985                         goto loop;
9986                 goto out;
9987         }
9988
9989         ret = check_chunks(&chunk_cache, &block_group_cache,
9990                            &dev_extent_cache, NULL, NULL, NULL, 0);
9991         if (ret) {
9992                 if (ret == -EAGAIN)
9993                         goto loop;
9994                 err = ret;
9995         }
9996
9997         ret = check_extent_refs(root, &extent_cache);
9998         if (ret < 0) {
9999                 if (ret == -EAGAIN)
10000                         goto loop;
10001                 goto out;
10002         }
10003
10004         ret = check_devices(&dev_cache, &dev_extent_cache);
10005         if (ret && err)
10006                 ret = err;
10007
10008 out:
10009         task_stop(ctx.info);
10010         if (repair) {
10011                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10012                 extent_io_tree_cleanup(&excluded_extents);
10013                 root->fs_info->fsck_extent_cache = NULL;
10014                 root->fs_info->free_extent_hook = NULL;
10015                 root->fs_info->corrupt_blocks = NULL;
10016                 root->fs_info->excluded_extents = NULL;
10017         }
10018         free(bits);
10019         free_chunk_cache_tree(&chunk_cache);
10020         free_device_cache_tree(&dev_cache);
10021         free_block_group_tree(&block_group_cache);
10022         free_device_extent_tree(&dev_extent_cache);
10023         free_extent_cache_tree(&seen);
10024         free_extent_cache_tree(&pending);
10025         free_extent_cache_tree(&reada);
10026         free_extent_cache_tree(&nodes);
10027         free_root_item_list(&normal_trees);
10028         free_root_item_list(&dropping_trees);
10029         return ret;
10030 loop:
10031         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10032         free_extent_cache_tree(&seen);
10033         free_extent_cache_tree(&pending);
10034         free_extent_cache_tree(&reada);
10035         free_extent_cache_tree(&nodes);
10036         free_chunk_cache_tree(&chunk_cache);
10037         free_block_group_tree(&block_group_cache);
10038         free_device_cache_tree(&dev_cache);
10039         free_device_extent_tree(&dev_extent_cache);
10040         free_extent_record_cache(&extent_cache);
10041         free_root_item_list(&normal_trees);
10042         free_root_item_list(&dropping_trees);
10043         extent_io_tree_cleanup(&excluded_extents);
10044         goto again;
10045 }
10046
10047 /*
10048  * Check backrefs of a tree block given by @bytenr or @eb.
10049  *
10050  * @root:       the root containing the @bytenr or @eb
10051  * @eb:         tree block extent buffer, can be NULL
10052  * @bytenr:     bytenr of the tree block to search
10053  * @level:      tree level of the tree block
10054  * @owner:      owner of the tree block
10055  *
10056  * Return >0 for any error found and output error message
10057  * Return 0 for no error found
10058  */
10059 static int check_tree_block_ref(struct btrfs_root *root,
10060                                 struct extent_buffer *eb, u64 bytenr,
10061                                 int level, u64 owner)
10062 {
10063         struct btrfs_key key;
10064         struct btrfs_root *extent_root = root->fs_info->extent_root;
10065         struct btrfs_path path;
10066         struct btrfs_extent_item *ei;
10067         struct btrfs_extent_inline_ref *iref;
10068         struct extent_buffer *leaf;
10069         unsigned long end;
10070         unsigned long ptr;
10071         int slot;
10072         int skinny_level;
10073         int type;
10074         u32 nodesize = root->fs_info->nodesize;
10075         u32 item_size;
10076         u64 offset;
10077         int tree_reloc_root = 0;
10078         int found_ref = 0;
10079         int err = 0;
10080         int ret;
10081
10082         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10083             btrfs_header_bytenr(root->node) == bytenr)
10084                 tree_reloc_root = 1;
10085
10086         btrfs_init_path(&path);
10087         key.objectid = bytenr;
10088         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10089                 key.type = BTRFS_METADATA_ITEM_KEY;
10090         else
10091                 key.type = BTRFS_EXTENT_ITEM_KEY;
10092         key.offset = (u64)-1;
10093
10094         /* Search for the backref in extent tree */
10095         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10096         if (ret < 0) {
10097                 err |= BACKREF_MISSING;
10098                 goto out;
10099         }
10100         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10101         if (ret) {
10102                 err |= BACKREF_MISSING;
10103                 goto out;
10104         }
10105
10106         leaf = path.nodes[0];
10107         slot = path.slots[0];
10108         btrfs_item_key_to_cpu(leaf, &key, slot);
10109
10110         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10111
10112         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10113                 skinny_level = (int)key.offset;
10114                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10115         } else {
10116                 struct btrfs_tree_block_info *info;
10117
10118                 info = (struct btrfs_tree_block_info *)(ei + 1);
10119                 skinny_level = btrfs_tree_block_level(leaf, info);
10120                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10121         }
10122
10123         if (eb) {
10124                 u64 header_gen;
10125                 u64 extent_gen;
10126
10127                 if (!(btrfs_extent_flags(leaf, ei) &
10128                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10129                         error(
10130                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10131                                 key.objectid, nodesize,
10132                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10133                         err = BACKREF_MISMATCH;
10134                 }
10135                 header_gen = btrfs_header_generation(eb);
10136                 extent_gen = btrfs_extent_generation(leaf, ei);
10137                 if (header_gen != extent_gen) {
10138                         error(
10139         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10140                                 key.objectid, nodesize, header_gen,
10141                                 extent_gen);
10142                         err = BACKREF_MISMATCH;
10143                 }
10144                 if (level != skinny_level) {
10145                         error(
10146                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10147                                 key.objectid, nodesize, level, skinny_level);
10148                         err = BACKREF_MISMATCH;
10149                 }
10150                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10151                         error(
10152                         "extent[%llu %u] is referred by other roots than %llu",
10153                                 key.objectid, nodesize, root->objectid);
10154                         err = BACKREF_MISMATCH;
10155                 }
10156         }
10157
10158         /*
10159          * Iterate the extent/metadata item to find the exact backref
10160          */
10161         item_size = btrfs_item_size_nr(leaf, slot);
10162         ptr = (unsigned long)iref;
10163         end = (unsigned long)ei + item_size;
10164         while (ptr < end) {
10165                 iref = (struct btrfs_extent_inline_ref *)ptr;
10166                 type = btrfs_extent_inline_ref_type(leaf, iref);
10167                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10168
10169                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10170                         (offset == root->objectid || offset == owner)) {
10171                         found_ref = 1;
10172                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10173                         /*
10174                          * Backref of tree reloc root points to itself, no need
10175                          * to check backref any more.
10176                          */
10177                         if (tree_reloc_root)
10178                                 found_ref = 1;
10179                         else
10180                         /* Check if the backref points to valid referencer */
10181                                 found_ref = !check_tree_block_ref(root, NULL,
10182                                                 offset, level + 1, owner);
10183                 }
10184
10185                 if (found_ref)
10186                         break;
10187                 ptr += btrfs_extent_inline_ref_size(type);
10188         }
10189
10190         /*
10191          * Inlined extent item doesn't have what we need, check
10192          * TREE_BLOCK_REF_KEY
10193          */
10194         if (!found_ref) {
10195                 btrfs_release_path(&path);
10196                 key.objectid = bytenr;
10197                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10198                 key.offset = root->objectid;
10199
10200                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10201                 if (!ret)
10202                         found_ref = 1;
10203         }
10204         if (!found_ref)
10205                 err |= BACKREF_MISSING;
10206 out:
10207         btrfs_release_path(&path);
10208         if (eb && (err & BACKREF_MISSING))
10209                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10210                         bytenr, nodesize, owner, level);
10211         return err;
10212 }
10213
10214 /*
10215  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10216  *
10217  * Return >0 any error found and output error message
10218  * Return 0 for no error found
10219  */
10220 static int check_extent_data_item(struct btrfs_root *root,
10221                                   struct extent_buffer *eb, int slot)
10222 {
10223         struct btrfs_file_extent_item *fi;
10224         struct btrfs_path path;
10225         struct btrfs_root *extent_root = root->fs_info->extent_root;
10226         struct btrfs_key fi_key;
10227         struct btrfs_key dbref_key;
10228         struct extent_buffer *leaf;
10229         struct btrfs_extent_item *ei;
10230         struct btrfs_extent_inline_ref *iref;
10231         struct btrfs_extent_data_ref *dref;
10232         u64 owner;
10233         u64 disk_bytenr;
10234         u64 disk_num_bytes;
10235         u64 extent_num_bytes;
10236         u64 extent_flags;
10237         u32 item_size;
10238         unsigned long end;
10239         unsigned long ptr;
10240         int type;
10241         u64 ref_root;
10242         int found_dbackref = 0;
10243         int err = 0;
10244         int ret;
10245
10246         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10247         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10248
10249         /* Nothing to check for hole and inline data extents */
10250         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10251             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10252                 return 0;
10253
10254         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10255         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10256         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10257
10258         /* Check unaligned disk_num_bytes and num_bytes */
10259         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10260                 error(
10261 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10262                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10263                         root->fs_info->sectorsize);
10264                 err |= BYTES_UNALIGNED;
10265         } else {
10266                 data_bytes_allocated += disk_num_bytes;
10267         }
10268         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10269                 error(
10270 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10271                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10272                         root->fs_info->sectorsize);
10273                 err |= BYTES_UNALIGNED;
10274         } else {
10275                 data_bytes_referenced += extent_num_bytes;
10276         }
10277         owner = btrfs_header_owner(eb);
10278
10279         /* Check the extent item of the file extent in extent tree */
10280         btrfs_init_path(&path);
10281         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10282         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10283         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10284
10285         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10286         if (ret)
10287                 goto out;
10288
10289         leaf = path.nodes[0];
10290         slot = path.slots[0];
10291         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10292
10293         extent_flags = btrfs_extent_flags(leaf, ei);
10294
10295         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10296                 error(
10297                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10298                     disk_bytenr, disk_num_bytes,
10299                     BTRFS_EXTENT_FLAG_DATA);
10300                 err |= BACKREF_MISMATCH;
10301         }
10302
10303         /* Check data backref inside that extent item */
10304         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10305         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10306         ptr = (unsigned long)iref;
10307         end = (unsigned long)ei + item_size;
10308         while (ptr < end) {
10309                 iref = (struct btrfs_extent_inline_ref *)ptr;
10310                 type = btrfs_extent_inline_ref_type(leaf, iref);
10311                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10312
10313                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10314                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10315                         if (ref_root == owner || ref_root == root->objectid)
10316                                 found_dbackref = 1;
10317                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10318                         found_dbackref = !check_tree_block_ref(root, NULL,
10319                                 btrfs_extent_inline_ref_offset(leaf, iref),
10320                                 0, owner);
10321                 }
10322
10323                 if (found_dbackref)
10324                         break;
10325                 ptr += btrfs_extent_inline_ref_size(type);
10326         }
10327
10328         if (!found_dbackref) {
10329                 btrfs_release_path(&path);
10330
10331                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10332                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10333                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10334                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10335                                 fi_key.objectid, fi_key.offset);
10336
10337                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10338                                         &dbref_key, &path, 0, 0);
10339                 if (!ret) {
10340                         found_dbackref = 1;
10341                         goto out;
10342                 }
10343
10344                 btrfs_release_path(&path);
10345
10346                 /*
10347                  * Neither inlined nor EXTENT_DATA_REF found, try
10348                  * SHARED_DATA_REF as last chance.
10349                  */
10350                 dbref_key.objectid = disk_bytenr;
10351                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10352                 dbref_key.offset = eb->start;
10353
10354                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10355                                         &dbref_key, &path, 0, 0);
10356                 if (!ret) {
10357                         found_dbackref = 1;
10358                         goto out;
10359                 }
10360         }
10361
10362 out:
10363         if (!found_dbackref)
10364                 err |= BACKREF_MISSING;
10365         btrfs_release_path(&path);
10366         if (err & BACKREF_MISSING) {
10367                 error("data extent[%llu %llu] backref lost",
10368                       disk_bytenr, disk_num_bytes);
10369         }
10370         return err;
10371 }
10372
10373 /*
10374  * Get real tree block level for the case like shared block
10375  * Return >= 0 as tree level
10376  * Return <0 for error
10377  */
10378 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10379 {
10380         struct extent_buffer *eb;
10381         struct btrfs_path path;
10382         struct btrfs_key key;
10383         struct btrfs_extent_item *ei;
10384         u64 flags;
10385         u64 transid;
10386         u8 backref_level;
10387         u8 header_level;
10388         int ret;
10389
10390         /* Search extent tree for extent generation and level */
10391         key.objectid = bytenr;
10392         key.type = BTRFS_METADATA_ITEM_KEY;
10393         key.offset = (u64)-1;
10394
10395         btrfs_init_path(&path);
10396         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10397         if (ret < 0)
10398                 goto release_out;
10399         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10400         if (ret < 0)
10401                 goto release_out;
10402         if (ret > 0) {
10403                 ret = -ENOENT;
10404                 goto release_out;
10405         }
10406
10407         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10408         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10409                             struct btrfs_extent_item);
10410         flags = btrfs_extent_flags(path.nodes[0], ei);
10411         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10412                 ret = -ENOENT;
10413                 goto release_out;
10414         }
10415
10416         /* Get transid for later read_tree_block() check */
10417         transid = btrfs_extent_generation(path.nodes[0], ei);
10418
10419         /* Get backref level as one source */
10420         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10421                 backref_level = key.offset;
10422         } else {
10423                 struct btrfs_tree_block_info *info;
10424
10425                 info = (struct btrfs_tree_block_info *)(ei + 1);
10426                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10427         }
10428         btrfs_release_path(&path);
10429
10430         /* Get level from tree block as an alternative source */
10431         eb = read_tree_block(fs_info, bytenr, transid);
10432         if (!extent_buffer_uptodate(eb)) {
10433                 free_extent_buffer(eb);
10434                 return -EIO;
10435         }
10436         header_level = btrfs_header_level(eb);
10437         free_extent_buffer(eb);
10438
10439         if (header_level != backref_level)
10440                 return -EIO;
10441         return header_level;
10442
10443 release_out:
10444         btrfs_release_path(&path);
10445         return ret;
10446 }
10447
10448 /*
10449  * Check if a tree block backref is valid (points to a valid tree block)
10450  * if level == -1, level will be resolved
10451  * Return >0 for any error found and print error message
10452  */
10453 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10454                                     u64 bytenr, int level)
10455 {
10456         struct btrfs_root *root;
10457         struct btrfs_key key;
10458         struct btrfs_path path;
10459         struct extent_buffer *eb;
10460         struct extent_buffer *node;
10461         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10462         int err = 0;
10463         int ret;
10464
10465         /* Query level for level == -1 special case */
10466         if (level == -1)
10467                 level = query_tree_block_level(fs_info, bytenr);
10468         if (level < 0) {
10469                 err |= REFERENCER_MISSING;
10470                 goto out;
10471         }
10472
10473         key.objectid = root_id;
10474         key.type = BTRFS_ROOT_ITEM_KEY;
10475         key.offset = (u64)-1;
10476
10477         root = btrfs_read_fs_root(fs_info, &key);
10478         if (IS_ERR(root)) {
10479                 err |= REFERENCER_MISSING;
10480                 goto out;
10481         }
10482
10483         /* Read out the tree block to get item/node key */
10484         eb = read_tree_block(fs_info, bytenr, 0);
10485         if (!extent_buffer_uptodate(eb)) {
10486                 err |= REFERENCER_MISSING;
10487                 free_extent_buffer(eb);
10488                 goto out;
10489         }
10490
10491         /* Empty tree, no need to check key */
10492         if (!btrfs_header_nritems(eb) && !level) {
10493                 free_extent_buffer(eb);
10494                 goto out;
10495         }
10496
10497         if (level)
10498                 btrfs_node_key_to_cpu(eb, &key, 0);
10499         else
10500                 btrfs_item_key_to_cpu(eb, &key, 0);
10501
10502         free_extent_buffer(eb);
10503
10504         btrfs_init_path(&path);
10505         path.lowest_level = level;
10506         /* Search with the first key, to ensure we can reach it */
10507         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10508         if (ret < 0) {
10509                 err |= REFERENCER_MISSING;
10510                 goto release_out;
10511         }
10512
10513         node = path.nodes[level];
10514         if (btrfs_header_bytenr(node) != bytenr) {
10515                 error(
10516         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10517                         bytenr, nodesize, bytenr,
10518                         btrfs_header_bytenr(node));
10519                 err |= REFERENCER_MISMATCH;
10520         }
10521         if (btrfs_header_level(node) != level) {
10522                 error(
10523         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10524                         bytenr, nodesize, level,
10525                         btrfs_header_level(node));
10526                 err |= REFERENCER_MISMATCH;
10527         }
10528
10529 release_out:
10530         btrfs_release_path(&path);
10531 out:
10532         if (err & REFERENCER_MISSING) {
10533                 if (level < 0)
10534                         error("extent [%llu %d] lost referencer (owner: %llu)",
10535                                 bytenr, nodesize, root_id);
10536                 else
10537                         error(
10538                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10539                                 bytenr, nodesize, root_id, level);
10540         }
10541
10542         return err;
10543 }
10544
10545 /*
10546  * Check if tree block @eb is tree reloc root.
10547  * Return 0 if it's not or any problem happens
10548  * Return 1 if it's a tree reloc root
10549  */
10550 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10551                                  struct extent_buffer *eb)
10552 {
10553         struct btrfs_root *tree_reloc_root;
10554         struct btrfs_key key;
10555         u64 bytenr = btrfs_header_bytenr(eb);
10556         u64 owner = btrfs_header_owner(eb);
10557         int ret = 0;
10558
10559         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10560         key.offset = owner;
10561         key.type = BTRFS_ROOT_ITEM_KEY;
10562
10563         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10564         if (IS_ERR(tree_reloc_root))
10565                 return 0;
10566
10567         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10568                 ret = 1;
10569         btrfs_free_fs_root(tree_reloc_root);
10570         return ret;
10571 }
10572
10573 /*
10574  * Check referencer for shared block backref
10575  * If level == -1, this function will resolve the level.
10576  */
10577 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10578                                      u64 parent, u64 bytenr, int level)
10579 {
10580         struct extent_buffer *eb;
10581         u32 nr;
10582         int found_parent = 0;
10583         int i;
10584
10585         eb = read_tree_block(fs_info, parent, 0);
10586         if (!extent_buffer_uptodate(eb))
10587                 goto out;
10588
10589         if (level == -1)
10590                 level = query_tree_block_level(fs_info, bytenr);
10591         if (level < 0)
10592                 goto out;
10593
10594         /* It's possible it's a tree reloc root */
10595         if (parent == bytenr) {
10596                 if (is_tree_reloc_root(fs_info, eb))
10597                         found_parent = 1;
10598                 goto out;
10599         }
10600
10601         if (level + 1 != btrfs_header_level(eb))
10602                 goto out;
10603
10604         nr = btrfs_header_nritems(eb);
10605         for (i = 0; i < nr; i++) {
10606                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10607                         found_parent = 1;
10608                         break;
10609                 }
10610         }
10611 out:
10612         free_extent_buffer(eb);
10613         if (!found_parent) {
10614                 error(
10615         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10616                         bytenr, fs_info->nodesize, parent, level);
10617                 return REFERENCER_MISSING;
10618         }
10619         return 0;
10620 }
10621
10622 /*
10623  * Check referencer for normal (inlined) data ref
10624  * If len == 0, it will be resolved by searching in extent tree
10625  */
10626 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10627                                      u64 root_id, u64 objectid, u64 offset,
10628                                      u64 bytenr, u64 len, u32 count)
10629 {
10630         struct btrfs_root *root;
10631         struct btrfs_root *extent_root = fs_info->extent_root;
10632         struct btrfs_key key;
10633         struct btrfs_path path;
10634         struct extent_buffer *leaf;
10635         struct btrfs_file_extent_item *fi;
10636         u32 found_count = 0;
10637         int slot;
10638         int ret = 0;
10639
10640         if (!len) {
10641                 key.objectid = bytenr;
10642                 key.type = BTRFS_EXTENT_ITEM_KEY;
10643                 key.offset = (u64)-1;
10644
10645                 btrfs_init_path(&path);
10646                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10647                 if (ret < 0)
10648                         goto out;
10649                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10650                 if (ret)
10651                         goto out;
10652                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10653                 if (key.objectid != bytenr ||
10654                     key.type != BTRFS_EXTENT_ITEM_KEY)
10655                         goto out;
10656                 len = key.offset;
10657                 btrfs_release_path(&path);
10658         }
10659         key.objectid = root_id;
10660         key.type = BTRFS_ROOT_ITEM_KEY;
10661         key.offset = (u64)-1;
10662         btrfs_init_path(&path);
10663
10664         root = btrfs_read_fs_root(fs_info, &key);
10665         if (IS_ERR(root))
10666                 goto out;
10667
10668         key.objectid = objectid;
10669         key.type = BTRFS_EXTENT_DATA_KEY;
10670         /*
10671          * It can be nasty as data backref offset is
10672          * file offset - file extent offset, which is smaller or
10673          * equal to original backref offset.  The only special case is
10674          * overflow.  So we need to special check and do further search.
10675          */
10676         key.offset = offset & (1ULL << 63) ? 0 : offset;
10677
10678         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10679         if (ret < 0)
10680                 goto out;
10681
10682         /*
10683          * Search afterwards to get correct one
10684          * NOTE: As we must do a comprehensive check on the data backref to
10685          * make sure the dref count also matches, we must iterate all file
10686          * extents for that inode.
10687          */
10688         while (1) {
10689                 leaf = path.nodes[0];
10690                 slot = path.slots[0];
10691
10692                 if (slot >= btrfs_header_nritems(leaf))
10693                         goto next;
10694                 btrfs_item_key_to_cpu(leaf, &key, slot);
10695                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10696                         break;
10697                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10698                 /*
10699                  * Except normal disk bytenr and disk num bytes, we still
10700                  * need to do extra check on dbackref offset as
10701                  * dbackref offset = file_offset - file_extent_offset
10702                  */
10703                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10704                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10705                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10706                     offset)
10707                         found_count++;
10708
10709 next:
10710                 ret = btrfs_next_item(root, &path);
10711                 if (ret)
10712                         break;
10713         }
10714 out:
10715         btrfs_release_path(&path);
10716         if (found_count != count) {
10717                 error(
10718 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10719                         bytenr, len, root_id, objectid, offset, count, found_count);
10720                 return REFERENCER_MISSING;
10721         }
10722         return 0;
10723 }
10724
10725 /*
10726  * Check if the referencer of a shared data backref exists
10727  */
10728 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10729                                      u64 parent, u64 bytenr)
10730 {
10731         struct extent_buffer *eb;
10732         struct btrfs_key key;
10733         struct btrfs_file_extent_item *fi;
10734         u32 nr;
10735         int found_parent = 0;
10736         int i;
10737
10738         eb = read_tree_block(fs_info, parent, 0);
10739         if (!extent_buffer_uptodate(eb))
10740                 goto out;
10741
10742         nr = btrfs_header_nritems(eb);
10743         for (i = 0; i < nr; i++) {
10744                 btrfs_item_key_to_cpu(eb, &key, i);
10745                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10746                         continue;
10747
10748                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10749                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10750                         continue;
10751
10752                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10753                         found_parent = 1;
10754                         break;
10755                 }
10756         }
10757
10758 out:
10759         free_extent_buffer(eb);
10760         if (!found_parent) {
10761                 error("shared extent %llu referencer lost (parent: %llu)",
10762                         bytenr, parent);
10763                 return REFERENCER_MISSING;
10764         }
10765         return 0;
10766 }
10767
10768 /*
10769  * This function will check a given extent item, including its backref and
10770  * itself (like crossing stripe boundary and type)
10771  *
10772  * Since we don't use extent_record anymore, introduce new error bit
10773  */
10774 static int check_extent_item(struct btrfs_fs_info *fs_info,
10775                              struct extent_buffer *eb, int slot)
10776 {
10777         struct btrfs_extent_item *ei;
10778         struct btrfs_extent_inline_ref *iref;
10779         struct btrfs_extent_data_ref *dref;
10780         unsigned long end;
10781         unsigned long ptr;
10782         int type;
10783         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10784         u32 item_size = btrfs_item_size_nr(eb, slot);
10785         u64 flags;
10786         u64 offset;
10787         int metadata = 0;
10788         int level;
10789         struct btrfs_key key;
10790         int ret;
10791         int err = 0;
10792
10793         btrfs_item_key_to_cpu(eb, &key, slot);
10794         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10795                 bytes_used += key.offset;
10796         else
10797                 bytes_used += nodesize;
10798
10799         if (item_size < sizeof(*ei)) {
10800                 /*
10801                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10802                  * old thing when on disk format is still un-determined.
10803                  * No need to care about it anymore
10804                  */
10805                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10806                 return -ENOTTY;
10807         }
10808
10809         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10810         flags = btrfs_extent_flags(eb, ei);
10811
10812         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10813                 metadata = 1;
10814         if (metadata && check_crossing_stripes(global_info, key.objectid,
10815                                                eb->len)) {
10816                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10817                       key.objectid, key.objectid + nodesize);
10818                 err |= CROSSING_STRIPE_BOUNDARY;
10819         }
10820
10821         ptr = (unsigned long)(ei + 1);
10822
10823         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10824                 /* Old EXTENT_ITEM metadata */
10825                 struct btrfs_tree_block_info *info;
10826
10827                 info = (struct btrfs_tree_block_info *)ptr;
10828                 level = btrfs_tree_block_level(eb, info);
10829                 ptr += sizeof(struct btrfs_tree_block_info);
10830         } else {
10831                 /* New METADATA_ITEM */
10832                 level = key.offset;
10833         }
10834         end = (unsigned long)ei + item_size;
10835
10836 next:
10837         /* Reached extent item end normally */
10838         if (ptr == end)
10839                 goto out;
10840
10841         /* Beyond extent item end, wrong item size */
10842         if (ptr > end) {
10843                 err |= ITEM_SIZE_MISMATCH;
10844                 error("extent item at bytenr %llu slot %d has wrong size",
10845                         eb->start, slot);
10846                 goto out;
10847         }
10848
10849         /* Now check every backref in this extent item */
10850         iref = (struct btrfs_extent_inline_ref *)ptr;
10851         type = btrfs_extent_inline_ref_type(eb, iref);
10852         offset = btrfs_extent_inline_ref_offset(eb, iref);
10853         switch (type) {
10854         case BTRFS_TREE_BLOCK_REF_KEY:
10855                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10856                                                level);
10857                 err |= ret;
10858                 break;
10859         case BTRFS_SHARED_BLOCK_REF_KEY:
10860                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10861                                                  level);
10862                 err |= ret;
10863                 break;
10864         case BTRFS_EXTENT_DATA_REF_KEY:
10865                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10866                 ret = check_extent_data_backref(fs_info,
10867                                 btrfs_extent_data_ref_root(eb, dref),
10868                                 btrfs_extent_data_ref_objectid(eb, dref),
10869                                 btrfs_extent_data_ref_offset(eb, dref),
10870                                 key.objectid, key.offset,
10871                                 btrfs_extent_data_ref_count(eb, dref));
10872                 err |= ret;
10873                 break;
10874         case BTRFS_SHARED_DATA_REF_KEY:
10875                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10876                 err |= ret;
10877                 break;
10878         default:
10879                 error("extent[%llu %d %llu] has unknown ref type: %d",
10880                         key.objectid, key.type, key.offset, type);
10881                 err |= UNKNOWN_TYPE;
10882                 goto out;
10883         }
10884
10885         ptr += btrfs_extent_inline_ref_size(type);
10886         goto next;
10887
10888 out:
10889         return err;
10890 }
10891
10892 /*
10893  * Check if a dev extent item is referred correctly by its chunk
10894  */
10895 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10896                                  struct extent_buffer *eb, int slot)
10897 {
10898         struct btrfs_root *chunk_root = fs_info->chunk_root;
10899         struct btrfs_dev_extent *ptr;
10900         struct btrfs_path path;
10901         struct btrfs_key chunk_key;
10902         struct btrfs_key devext_key;
10903         struct btrfs_chunk *chunk;
10904         struct extent_buffer *l;
10905         int num_stripes;
10906         u64 length;
10907         int i;
10908         int found_chunk = 0;
10909         int ret;
10910
10911         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10912         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10913         length = btrfs_dev_extent_length(eb, ptr);
10914
10915         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10916         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10917         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10918
10919         btrfs_init_path(&path);
10920         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10921         if (ret)
10922                 goto out;
10923
10924         l = path.nodes[0];
10925         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10926         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10927                                       chunk_key.offset);
10928         if (ret < 0)
10929                 goto out;
10930
10931         if (btrfs_stripe_length(fs_info, l, chunk) != length)
10932                 goto out;
10933
10934         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10935         for (i = 0; i < num_stripes; i++) {
10936                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10937                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10938
10939                 if (devid == devext_key.objectid &&
10940                     offset == devext_key.offset) {
10941                         found_chunk = 1;
10942                         break;
10943                 }
10944         }
10945 out:
10946         btrfs_release_path(&path);
10947         if (!found_chunk) {
10948                 error(
10949                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10950                         devext_key.objectid, devext_key.offset, length);
10951                 return REFERENCER_MISSING;
10952         }
10953         return 0;
10954 }
10955
10956 /*
10957  * Check if the used space is correct with the dev item
10958  */
10959 static int check_dev_item(struct btrfs_fs_info *fs_info,
10960                           struct extent_buffer *eb, int slot)
10961 {
10962         struct btrfs_root *dev_root = fs_info->dev_root;
10963         struct btrfs_dev_item *dev_item;
10964         struct btrfs_path path;
10965         struct btrfs_key key;
10966         struct btrfs_dev_extent *ptr;
10967         u64 dev_id;
10968         u64 used;
10969         u64 total = 0;
10970         int ret;
10971
10972         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10973         dev_id = btrfs_device_id(eb, dev_item);
10974         used = btrfs_device_bytes_used(eb, dev_item);
10975
10976         key.objectid = dev_id;
10977         key.type = BTRFS_DEV_EXTENT_KEY;
10978         key.offset = 0;
10979
10980         btrfs_init_path(&path);
10981         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10982         if (ret < 0) {
10983                 btrfs_item_key_to_cpu(eb, &key, slot);
10984                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10985                         key.objectid, key.type, key.offset);
10986                 btrfs_release_path(&path);
10987                 return REFERENCER_MISSING;
10988         }
10989
10990         /* Iterate dev_extents to calculate the used space of a device */
10991         while (1) {
10992                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10993                         goto next;
10994
10995                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10996                 if (key.objectid > dev_id)
10997                         break;
10998                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10999                         goto next;
11000
11001                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11002                                      struct btrfs_dev_extent);
11003                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11004 next:
11005                 ret = btrfs_next_item(dev_root, &path);
11006                 if (ret)
11007                         break;
11008         }
11009         btrfs_release_path(&path);
11010
11011         if (used != total) {
11012                 btrfs_item_key_to_cpu(eb, &key, slot);
11013                 error(
11014 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11015                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11016                         BTRFS_DEV_EXTENT_KEY, dev_id);
11017                 return ACCOUNTING_MISMATCH;
11018         }
11019         return 0;
11020 }
11021
11022 /*
11023  * Check a block group item with its referener (chunk) and its used space
11024  * with extent/metadata item
11025  */
11026 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11027                                   struct extent_buffer *eb, int slot)
11028 {
11029         struct btrfs_root *extent_root = fs_info->extent_root;
11030         struct btrfs_root *chunk_root = fs_info->chunk_root;
11031         struct btrfs_block_group_item *bi;
11032         struct btrfs_block_group_item bg_item;
11033         struct btrfs_path path;
11034         struct btrfs_key bg_key;
11035         struct btrfs_key chunk_key;
11036         struct btrfs_key extent_key;
11037         struct btrfs_chunk *chunk;
11038         struct extent_buffer *leaf;
11039         struct btrfs_extent_item *ei;
11040         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11041         u64 flags;
11042         u64 bg_flags;
11043         u64 used;
11044         u64 total = 0;
11045         int ret;
11046         int err = 0;
11047
11048         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11049         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11050         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11051         used = btrfs_block_group_used(&bg_item);
11052         bg_flags = btrfs_block_group_flags(&bg_item);
11053
11054         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11055         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11056         chunk_key.offset = bg_key.objectid;
11057
11058         btrfs_init_path(&path);
11059         /* Search for the referencer chunk */
11060         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11061         if (ret) {
11062                 error(
11063                 "block group[%llu %llu] did not find the related chunk item",
11064                         bg_key.objectid, bg_key.offset);
11065                 err |= REFERENCER_MISSING;
11066         } else {
11067                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11068                                         struct btrfs_chunk);
11069                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11070                                                 bg_key.offset) {
11071                         error(
11072         "block group[%llu %llu] related chunk item length does not match",
11073                                 bg_key.objectid, bg_key.offset);
11074                         err |= REFERENCER_MISMATCH;
11075                 }
11076         }
11077         btrfs_release_path(&path);
11078
11079         /* Search from the block group bytenr */
11080         extent_key.objectid = bg_key.objectid;
11081         extent_key.type = 0;
11082         extent_key.offset = 0;
11083
11084         btrfs_init_path(&path);
11085         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11086         if (ret < 0)
11087                 goto out;
11088
11089         /* Iterate extent tree to account used space */
11090         while (1) {
11091                 leaf = path.nodes[0];
11092
11093                 /* Search slot can point to the last item beyond leaf nritems */
11094                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11095                         goto next;
11096
11097                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11098                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11099                         break;
11100
11101                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11102                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11103                         goto next;
11104                 if (extent_key.objectid < bg_key.objectid)
11105                         goto next;
11106
11107                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11108                         total += nodesize;
11109                 else
11110                         total += extent_key.offset;
11111
11112                 ei = btrfs_item_ptr(leaf, path.slots[0],
11113                                     struct btrfs_extent_item);
11114                 flags = btrfs_extent_flags(leaf, ei);
11115                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11116                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11117                                 error(
11118                         "bad extent[%llu, %llu) type mismatch with chunk",
11119                                         extent_key.objectid,
11120                                         extent_key.objectid + extent_key.offset);
11121                                 err |= CHUNK_TYPE_MISMATCH;
11122                         }
11123                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11124                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11125                                     BTRFS_BLOCK_GROUP_METADATA))) {
11126                                 error(
11127                         "bad extent[%llu, %llu) type mismatch with chunk",
11128                                         extent_key.objectid,
11129                                         extent_key.objectid + nodesize);
11130                                 err |= CHUNK_TYPE_MISMATCH;
11131                         }
11132                 }
11133 next:
11134                 ret = btrfs_next_item(extent_root, &path);
11135                 if (ret)
11136                         break;
11137         }
11138
11139 out:
11140         btrfs_release_path(&path);
11141
11142         if (total != used) {
11143                 error(
11144                 "block group[%llu %llu] used %llu but extent items used %llu",
11145                         bg_key.objectid, bg_key.offset, used, total);
11146                 err |= ACCOUNTING_MISMATCH;
11147         }
11148         return err;
11149 }
11150
11151 /*
11152  * Check a chunk item.
11153  * Including checking all referred dev_extents and block group
11154  */
11155 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11156                             struct extent_buffer *eb, int slot)
11157 {
11158         struct btrfs_root *extent_root = fs_info->extent_root;
11159         struct btrfs_root *dev_root = fs_info->dev_root;
11160         struct btrfs_path path;
11161         struct btrfs_key chunk_key;
11162         struct btrfs_key bg_key;
11163         struct btrfs_key devext_key;
11164         struct btrfs_chunk *chunk;
11165         struct extent_buffer *leaf;
11166         struct btrfs_block_group_item *bi;
11167         struct btrfs_block_group_item bg_item;
11168         struct btrfs_dev_extent *ptr;
11169         u64 length;
11170         u64 chunk_end;
11171         u64 stripe_len;
11172         u64 type;
11173         int num_stripes;
11174         u64 offset;
11175         u64 objectid;
11176         int i;
11177         int ret;
11178         int err = 0;
11179
11180         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11181         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11182         length = btrfs_chunk_length(eb, chunk);
11183         chunk_end = chunk_key.offset + length;
11184         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11185                                       chunk_key.offset);
11186         if (ret < 0) {
11187                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11188                         chunk_end);
11189                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11190                 goto out;
11191         }
11192         type = btrfs_chunk_type(eb, chunk);
11193
11194         bg_key.objectid = chunk_key.offset;
11195         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11196         bg_key.offset = length;
11197
11198         btrfs_init_path(&path);
11199         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11200         if (ret) {
11201                 error(
11202                 "chunk[%llu %llu) did not find the related block group item",
11203                         chunk_key.offset, chunk_end);
11204                 err |= REFERENCER_MISSING;
11205         } else{
11206                 leaf = path.nodes[0];
11207                 bi = btrfs_item_ptr(leaf, path.slots[0],
11208                                     struct btrfs_block_group_item);
11209                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11210                                    sizeof(bg_item));
11211                 if (btrfs_block_group_flags(&bg_item) != type) {
11212                         error(
11213 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11214                                 chunk_key.offset, chunk_end, type,
11215                                 btrfs_block_group_flags(&bg_item));
11216                         err |= REFERENCER_MISSING;
11217                 }
11218         }
11219
11220         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11221         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11222         for (i = 0; i < num_stripes; i++) {
11223                 btrfs_release_path(&path);
11224                 btrfs_init_path(&path);
11225                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11226                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11227                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11228
11229                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11230                                         0, 0);
11231                 if (ret)
11232                         goto not_match_dev;
11233
11234                 leaf = path.nodes[0];
11235                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11236                                      struct btrfs_dev_extent);
11237                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11238                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11239                 if (objectid != chunk_key.objectid ||
11240                     offset != chunk_key.offset ||
11241                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11242                         goto not_match_dev;
11243                 continue;
11244 not_match_dev:
11245                 err |= BACKREF_MISSING;
11246                 error(
11247                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11248                         chunk_key.objectid, chunk_end, i);
11249                 continue;
11250         }
11251         btrfs_release_path(&path);
11252 out:
11253         return err;
11254 }
11255
11256 /*
11257  * Main entry function to check known items and update related accounting info
11258  */
11259 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11260 {
11261         struct btrfs_fs_info *fs_info = root->fs_info;
11262         struct btrfs_key key;
11263         int slot = 0;
11264         int type;
11265         struct btrfs_extent_data_ref *dref;
11266         int ret;
11267         int err = 0;
11268
11269 next:
11270         btrfs_item_key_to_cpu(eb, &key, slot);
11271         type = key.type;
11272
11273         switch (type) {
11274         case BTRFS_EXTENT_DATA_KEY:
11275                 ret = check_extent_data_item(root, eb, slot);
11276                 err |= ret;
11277                 break;
11278         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11279                 ret = check_block_group_item(fs_info, eb, slot);
11280                 err |= ret;
11281                 break;
11282         case BTRFS_DEV_ITEM_KEY:
11283                 ret = check_dev_item(fs_info, eb, slot);
11284                 err |= ret;
11285                 break;
11286         case BTRFS_CHUNK_ITEM_KEY:
11287                 ret = check_chunk_item(fs_info, eb, slot);
11288                 err |= ret;
11289                 break;
11290         case BTRFS_DEV_EXTENT_KEY:
11291                 ret = check_dev_extent_item(fs_info, eb, slot);
11292                 err |= ret;
11293                 break;
11294         case BTRFS_EXTENT_ITEM_KEY:
11295         case BTRFS_METADATA_ITEM_KEY:
11296                 ret = check_extent_item(fs_info, eb, slot);
11297                 err |= ret;
11298                 break;
11299         case BTRFS_EXTENT_CSUM_KEY:
11300                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11301                 break;
11302         case BTRFS_TREE_BLOCK_REF_KEY:
11303                 ret = check_tree_block_backref(fs_info, key.offset,
11304                                                key.objectid, -1);
11305                 err |= ret;
11306                 break;
11307         case BTRFS_EXTENT_DATA_REF_KEY:
11308                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11309                 ret = check_extent_data_backref(fs_info,
11310                                 btrfs_extent_data_ref_root(eb, dref),
11311                                 btrfs_extent_data_ref_objectid(eb, dref),
11312                                 btrfs_extent_data_ref_offset(eb, dref),
11313                                 key.objectid, 0,
11314                                 btrfs_extent_data_ref_count(eb, dref));
11315                 err |= ret;
11316                 break;
11317         case BTRFS_SHARED_BLOCK_REF_KEY:
11318                 ret = check_shared_block_backref(fs_info, key.offset,
11319                                                  key.objectid, -1);
11320                 err |= ret;
11321                 break;
11322         case BTRFS_SHARED_DATA_REF_KEY:
11323                 ret = check_shared_data_backref(fs_info, key.offset,
11324                                                 key.objectid);
11325                 err |= ret;
11326                 break;
11327         default:
11328                 break;
11329         }
11330
11331         if (++slot < btrfs_header_nritems(eb))
11332                 goto next;
11333
11334         return err;
11335 }
11336
11337 /*
11338  * Helper function for later fs/subvol tree check.  To determine if a tree
11339  * block should be checked.
11340  * This function will ensure only the direct referencer with lowest rootid to
11341  * check a fs/subvolume tree block.
11342  *
11343  * Backref check at extent tree would detect errors like missing subvolume
11344  * tree, so we can do aggressive check to reduce duplicated checks.
11345  */
11346 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11347 {
11348         struct btrfs_root *extent_root = root->fs_info->extent_root;
11349         struct btrfs_key key;
11350         struct btrfs_path path;
11351         struct extent_buffer *leaf;
11352         int slot;
11353         struct btrfs_extent_item *ei;
11354         unsigned long ptr;
11355         unsigned long end;
11356         int type;
11357         u32 item_size;
11358         u64 offset;
11359         struct btrfs_extent_inline_ref *iref;
11360         int ret;
11361
11362         btrfs_init_path(&path);
11363         key.objectid = btrfs_header_bytenr(eb);
11364         key.type = BTRFS_METADATA_ITEM_KEY;
11365         key.offset = (u64)-1;
11366
11367         /*
11368          * Any failure in backref resolving means we can't determine
11369          * whom the tree block belongs to.
11370          * So in that case, we need to check that tree block
11371          */
11372         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11373         if (ret < 0)
11374                 goto need_check;
11375
11376         ret = btrfs_previous_extent_item(extent_root, &path,
11377                                          btrfs_header_bytenr(eb));
11378         if (ret)
11379                 goto need_check;
11380
11381         leaf = path.nodes[0];
11382         slot = path.slots[0];
11383         btrfs_item_key_to_cpu(leaf, &key, slot);
11384         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11385
11386         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11387                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11388         } else {
11389                 struct btrfs_tree_block_info *info;
11390
11391                 info = (struct btrfs_tree_block_info *)(ei + 1);
11392                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11393         }
11394
11395         item_size = btrfs_item_size_nr(leaf, slot);
11396         ptr = (unsigned long)iref;
11397         end = (unsigned long)ei + item_size;
11398         while (ptr < end) {
11399                 iref = (struct btrfs_extent_inline_ref *)ptr;
11400                 type = btrfs_extent_inline_ref_type(leaf, iref);
11401                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11402
11403                 /*
11404                  * We only check the tree block if current root is
11405                  * the lowest referencer of it.
11406                  */
11407                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11408                     offset < root->objectid) {
11409                         btrfs_release_path(&path);
11410                         return 0;
11411                 }
11412
11413                 ptr += btrfs_extent_inline_ref_size(type);
11414         }
11415         /*
11416          * Normally we should also check keyed tree block ref, but that may be
11417          * very time consuming.  Inlined ref should already make us skip a lot
11418          * of refs now.  So skip search keyed tree block ref.
11419          */
11420
11421 need_check:
11422         btrfs_release_path(&path);
11423         return 1;
11424 }
11425
11426 /*
11427  * Traversal function for tree block. We will do:
11428  * 1) Skip shared fs/subvolume tree blocks
11429  * 2) Update related bytes accounting
11430  * 3) Pre-order traversal
11431  */
11432 static int traverse_tree_block(struct btrfs_root *root,
11433                                 struct extent_buffer *node)
11434 {
11435         struct extent_buffer *eb;
11436         struct btrfs_key key;
11437         struct btrfs_key drop_key;
11438         int level;
11439         u64 nr;
11440         int i;
11441         int err = 0;
11442         int ret;
11443
11444         /*
11445          * Skip shared fs/subvolume tree block, in that case they will
11446          * be checked by referencer with lowest rootid
11447          */
11448         if (is_fstree(root->objectid) && !should_check(root, node))
11449                 return 0;
11450
11451         /* Update bytes accounting */
11452         total_btree_bytes += node->len;
11453         if (fs_root_objectid(btrfs_header_owner(node)))
11454                 total_fs_tree_bytes += node->len;
11455         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11456                 total_extent_tree_bytes += node->len;
11457         if (!found_old_backref &&
11458             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11459             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11460             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11461                 found_old_backref = 1;
11462
11463         /* pre-order tranversal, check itself first */
11464         level = btrfs_header_level(node);
11465         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11466                                    btrfs_header_level(node),
11467                                    btrfs_header_owner(node));
11468         err |= ret;
11469         if (err)
11470                 error(
11471         "check %s failed root %llu bytenr %llu level %d, force continue check",
11472                         level ? "node":"leaf", root->objectid,
11473                         btrfs_header_bytenr(node), btrfs_header_level(node));
11474
11475         if (!level) {
11476                 btree_space_waste += btrfs_leaf_free_space(root, node);
11477                 ret = check_leaf_items(root, node);
11478                 err |= ret;
11479                 return err;
11480         }
11481
11482         nr = btrfs_header_nritems(node);
11483         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11484         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11485                 sizeof(struct btrfs_key_ptr);
11486
11487         /* Then check all its children */
11488         for (i = 0; i < nr; i++) {
11489                 u64 blocknr = btrfs_node_blockptr(node, i);
11490
11491                 btrfs_node_key_to_cpu(node, &key, i);
11492                 if (level == root->root_item.drop_level &&
11493                     is_dropped_key(&key, &drop_key))
11494                         continue;
11495
11496                 /*
11497                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11498                  * to call the function itself.
11499                  */
11500                 eb = read_tree_block(root->fs_info, blocknr, 0);
11501                 if (extent_buffer_uptodate(eb)) {
11502                         ret = traverse_tree_block(root, eb);
11503                         err |= ret;
11504                 }
11505                 free_extent_buffer(eb);
11506         }
11507
11508         return err;
11509 }
11510
11511 /*
11512  * Low memory usage version check_chunks_and_extents.
11513  */
11514 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11515 {
11516         struct btrfs_path path;
11517         struct btrfs_key key;
11518         struct btrfs_root *root1;
11519         struct btrfs_root *cur_root;
11520         int err = 0;
11521         int ret;
11522
11523         root1 = root->fs_info->chunk_root;
11524         ret = traverse_tree_block(root1, root1->node);
11525         err |= ret;
11526
11527         root1 = root->fs_info->tree_root;
11528         ret = traverse_tree_block(root1, root1->node);
11529         err |= ret;
11530
11531         btrfs_init_path(&path);
11532         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11533         key.offset = 0;
11534         key.type = BTRFS_ROOT_ITEM_KEY;
11535
11536         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11537         if (ret) {
11538                 error("cannot find extent treet in tree_root");
11539                 goto out;
11540         }
11541
11542         while (1) {
11543                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11544                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11545                         goto next;
11546                 key.offset = (u64)-1;
11547
11548                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11549                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11550                                         &key);
11551                 else
11552                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11553                 if (IS_ERR(cur_root) || !cur_root) {
11554                         error("failed to read tree: %lld", key.objectid);
11555                         goto next;
11556                 }
11557
11558                 ret = traverse_tree_block(cur_root, cur_root->node);
11559                 err |= ret;
11560
11561                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11562                         btrfs_free_fs_root(cur_root);
11563 next:
11564                 ret = btrfs_next_item(root1, &path);
11565                 if (ret)
11566                         goto out;
11567         }
11568
11569 out:
11570         btrfs_release_path(&path);
11571         return err;
11572 }
11573
11574 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11575                            struct btrfs_root *root, int overwrite)
11576 {
11577         struct extent_buffer *c;
11578         struct extent_buffer *old = root->node;
11579         int level;
11580         int ret;
11581         struct btrfs_disk_key disk_key = {0,0,0};
11582
11583         level = 0;
11584
11585         if (overwrite) {
11586                 c = old;
11587                 extent_buffer_get(c);
11588                 goto init;
11589         }
11590         c = btrfs_alloc_free_block(trans, root,
11591                                    root->fs_info->nodesize,
11592                                    root->root_key.objectid,
11593                                    &disk_key, level, 0, 0);
11594         if (IS_ERR(c)) {
11595                 c = old;
11596                 extent_buffer_get(c);
11597                 overwrite = 1;
11598         }
11599 init:
11600         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11601         btrfs_set_header_level(c, level);
11602         btrfs_set_header_bytenr(c, c->start);
11603         btrfs_set_header_generation(c, trans->transid);
11604         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11605         btrfs_set_header_owner(c, root->root_key.objectid);
11606
11607         write_extent_buffer(c, root->fs_info->fsid,
11608                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11609
11610         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11611                             btrfs_header_chunk_tree_uuid(c),
11612                             BTRFS_UUID_SIZE);
11613
11614         btrfs_mark_buffer_dirty(c);
11615         /*
11616          * this case can happen in the following case:
11617          *
11618          * 1.overwrite previous root.
11619          *
11620          * 2.reinit reloc data root, this is because we skip pin
11621          * down reloc data tree before which means we can allocate
11622          * same block bytenr here.
11623          */
11624         if (old->start == c->start) {
11625                 btrfs_set_root_generation(&root->root_item,
11626                                           trans->transid);
11627                 root->root_item.level = btrfs_header_level(root->node);
11628                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11629                                         &root->root_key, &root->root_item);
11630                 if (ret) {
11631                         free_extent_buffer(c);
11632                         return ret;
11633                 }
11634         }
11635         free_extent_buffer(old);
11636         root->node = c;
11637         add_root_to_dirty_list(root);
11638         return 0;
11639 }
11640
11641 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11642                                 struct extent_buffer *eb, int tree_root)
11643 {
11644         struct extent_buffer *tmp;
11645         struct btrfs_root_item *ri;
11646         struct btrfs_key key;
11647         u64 bytenr;
11648         int level = btrfs_header_level(eb);
11649         int nritems;
11650         int ret;
11651         int i;
11652
11653         /*
11654          * If we have pinned this block before, don't pin it again.
11655          * This can not only avoid forever loop with broken filesystem
11656          * but also give us some speedups.
11657          */
11658         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11659                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11660                 return 0;
11661
11662         btrfs_pin_extent(fs_info, eb->start, eb->len);
11663
11664         nritems = btrfs_header_nritems(eb);
11665         for (i = 0; i < nritems; i++) {
11666                 if (level == 0) {
11667                         btrfs_item_key_to_cpu(eb, &key, i);
11668                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11669                                 continue;
11670                         /* Skip the extent root and reloc roots */
11671                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11672                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11673                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11674                                 continue;
11675                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11676                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11677
11678                         /*
11679                          * If at any point we start needing the real root we
11680                          * will have to build a stump root for the root we are
11681                          * in, but for now this doesn't actually use the root so
11682                          * just pass in extent_root.
11683                          */
11684                         tmp = read_tree_block(fs_info, bytenr, 0);
11685                         if (!extent_buffer_uptodate(tmp)) {
11686                                 fprintf(stderr, "Error reading root block\n");
11687                                 return -EIO;
11688                         }
11689                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11690                         free_extent_buffer(tmp);
11691                         if (ret)
11692                                 return ret;
11693                 } else {
11694                         bytenr = btrfs_node_blockptr(eb, i);
11695
11696                         /* If we aren't the tree root don't read the block */
11697                         if (level == 1 && !tree_root) {
11698                                 btrfs_pin_extent(fs_info, bytenr,
11699                                                 fs_info->nodesize);
11700                                 continue;
11701                         }
11702
11703                         tmp = read_tree_block(fs_info, bytenr, 0);
11704                         if (!extent_buffer_uptodate(tmp)) {
11705                                 fprintf(stderr, "Error reading tree block\n");
11706                                 return -EIO;
11707                         }
11708                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11709                         free_extent_buffer(tmp);
11710                         if (ret)
11711                                 return ret;
11712                 }
11713         }
11714
11715         return 0;
11716 }
11717
11718 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11719 {
11720         int ret;
11721
11722         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11723         if (ret)
11724                 return ret;
11725
11726         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11727 }
11728
11729 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11730 {
11731         struct btrfs_block_group_cache *cache;
11732         struct btrfs_path path;
11733         struct extent_buffer *leaf;
11734         struct btrfs_chunk *chunk;
11735         struct btrfs_key key;
11736         int ret;
11737         u64 start;
11738
11739         btrfs_init_path(&path);
11740         key.objectid = 0;
11741         key.type = BTRFS_CHUNK_ITEM_KEY;
11742         key.offset = 0;
11743         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11744         if (ret < 0) {
11745                 btrfs_release_path(&path);
11746                 return ret;
11747         }
11748
11749         /*
11750          * We do this in case the block groups were screwed up and had alloc
11751          * bits that aren't actually set on the chunks.  This happens with
11752          * restored images every time and could happen in real life I guess.
11753          */
11754         fs_info->avail_data_alloc_bits = 0;
11755         fs_info->avail_metadata_alloc_bits = 0;
11756         fs_info->avail_system_alloc_bits = 0;
11757
11758         /* First we need to create the in-memory block groups */
11759         while (1) {
11760                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11761                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11762                         if (ret < 0) {
11763                                 btrfs_release_path(&path);
11764                                 return ret;
11765                         }
11766                         if (ret) {
11767                                 ret = 0;
11768                                 break;
11769                         }
11770                 }
11771                 leaf = path.nodes[0];
11772                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11773                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11774                         path.slots[0]++;
11775                         continue;
11776                 }
11777
11778                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11779                 btrfs_add_block_group(fs_info, 0,
11780                                       btrfs_chunk_type(leaf, chunk),
11781                                       key.objectid, key.offset,
11782                                       btrfs_chunk_length(leaf, chunk));
11783                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11784                                  key.offset + btrfs_chunk_length(leaf, chunk));
11785                 path.slots[0]++;
11786         }
11787         start = 0;
11788         while (1) {
11789                 cache = btrfs_lookup_first_block_group(fs_info, start);
11790                 if (!cache)
11791                         break;
11792                 cache->cached = 1;
11793                 start = cache->key.objectid + cache->key.offset;
11794         }
11795
11796         btrfs_release_path(&path);
11797         return 0;
11798 }
11799
11800 static int reset_balance(struct btrfs_trans_handle *trans,
11801                          struct btrfs_fs_info *fs_info)
11802 {
11803         struct btrfs_root *root = fs_info->tree_root;
11804         struct btrfs_path path;
11805         struct extent_buffer *leaf;
11806         struct btrfs_key key;
11807         int del_slot, del_nr = 0;
11808         int ret;
11809         int found = 0;
11810
11811         btrfs_init_path(&path);
11812         key.objectid = BTRFS_BALANCE_OBJECTID;
11813         key.type = BTRFS_BALANCE_ITEM_KEY;
11814         key.offset = 0;
11815         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11816         if (ret) {
11817                 if (ret > 0)
11818                         ret = 0;
11819                 if (!ret)
11820                         goto reinit_data_reloc;
11821                 else
11822                         goto out;
11823         }
11824
11825         ret = btrfs_del_item(trans, root, &path);
11826         if (ret)
11827                 goto out;
11828         btrfs_release_path(&path);
11829
11830         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11831         key.type = BTRFS_ROOT_ITEM_KEY;
11832         key.offset = 0;
11833         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11834         if (ret < 0)
11835                 goto out;
11836         while (1) {
11837                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11838                         if (!found)
11839                                 break;
11840
11841                         if (del_nr) {
11842                                 ret = btrfs_del_items(trans, root, &path,
11843                                                       del_slot, del_nr);
11844                                 del_nr = 0;
11845                                 if (ret)
11846                                         goto out;
11847                         }
11848                         key.offset++;
11849                         btrfs_release_path(&path);
11850
11851                         found = 0;
11852                         ret = btrfs_search_slot(trans, root, &key, &path,
11853                                                 -1, 1);
11854                         if (ret < 0)
11855                                 goto out;
11856                         continue;
11857                 }
11858                 found = 1;
11859                 leaf = path.nodes[0];
11860                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11861                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11862                         break;
11863                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11864                         path.slots[0]++;
11865                         continue;
11866                 }
11867                 if (!del_nr) {
11868                         del_slot = path.slots[0];
11869                         del_nr = 1;
11870                 } else {
11871                         del_nr++;
11872                 }
11873                 path.slots[0]++;
11874         }
11875
11876         if (del_nr) {
11877                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11878                 if (ret)
11879                         goto out;
11880         }
11881         btrfs_release_path(&path);
11882
11883 reinit_data_reloc:
11884         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11885         key.type = BTRFS_ROOT_ITEM_KEY;
11886         key.offset = (u64)-1;
11887         root = btrfs_read_fs_root(fs_info, &key);
11888         if (IS_ERR(root)) {
11889                 fprintf(stderr, "Error reading data reloc tree\n");
11890                 ret = PTR_ERR(root);
11891                 goto out;
11892         }
11893         record_root_in_trans(trans, root);
11894         ret = btrfs_fsck_reinit_root(trans, root, 0);
11895         if (ret)
11896                 goto out;
11897         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11898 out:
11899         btrfs_release_path(&path);
11900         return ret;
11901 }
11902
11903 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11904                               struct btrfs_fs_info *fs_info)
11905 {
11906         u64 start = 0;
11907         int ret;
11908
11909         /*
11910          * The only reason we don't do this is because right now we're just
11911          * walking the trees we find and pinning down their bytes, we don't look
11912          * at any of the leaves.  In order to do mixed groups we'd have to check
11913          * the leaves of any fs roots and pin down the bytes for any file
11914          * extents we find.  Not hard but why do it if we don't have to?
11915          */
11916         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11917                 fprintf(stderr, "We don't support re-initing the extent tree "
11918                         "for mixed block groups yet, please notify a btrfs "
11919                         "developer you want to do this so they can add this "
11920                         "functionality.\n");
11921                 return -EINVAL;
11922         }
11923
11924         /*
11925          * first we need to walk all of the trees except the extent tree and pin
11926          * down the bytes that are in use so we don't overwrite any existing
11927          * metadata.
11928          */
11929         ret = pin_metadata_blocks(fs_info);
11930         if (ret) {
11931                 fprintf(stderr, "error pinning down used bytes\n");
11932                 return ret;
11933         }
11934
11935         /*
11936          * Need to drop all the block groups since we're going to recreate all
11937          * of them again.
11938          */
11939         btrfs_free_block_groups(fs_info);
11940         ret = reset_block_groups(fs_info);
11941         if (ret) {
11942                 fprintf(stderr, "error resetting the block groups\n");
11943                 return ret;
11944         }
11945
11946         /* Ok we can allocate now, reinit the extent root */
11947         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11948         if (ret) {
11949                 fprintf(stderr, "extent root initialization failed\n");
11950                 /*
11951                  * When the transaction code is updated we should end the
11952                  * transaction, but for now progs only knows about commit so
11953                  * just return an error.
11954                  */
11955                 return ret;
11956         }
11957
11958         /*
11959          * Now we have all the in-memory block groups setup so we can make
11960          * allocations properly, and the metadata we care about is safe since we
11961          * pinned all of it above.
11962          */
11963         while (1) {
11964                 struct btrfs_block_group_cache *cache;
11965
11966                 cache = btrfs_lookup_first_block_group(fs_info, start);
11967                 if (!cache)
11968                         break;
11969                 start = cache->key.objectid + cache->key.offset;
11970                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11971                                         &cache->key, &cache->item,
11972                                         sizeof(cache->item));
11973                 if (ret) {
11974                         fprintf(stderr, "Error adding block group\n");
11975                         return ret;
11976                 }
11977                 btrfs_extent_post_op(trans, fs_info->extent_root);
11978         }
11979
11980         ret = reset_balance(trans, fs_info);
11981         if (ret)
11982                 fprintf(stderr, "error resetting the pending balance\n");
11983
11984         return ret;
11985 }
11986
11987 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11988 {
11989         struct btrfs_path path;
11990         struct btrfs_trans_handle *trans;
11991         struct btrfs_key key;
11992         int ret;
11993
11994         printf("Recowing metadata block %llu\n", eb->start);
11995         key.objectid = btrfs_header_owner(eb);
11996         key.type = BTRFS_ROOT_ITEM_KEY;
11997         key.offset = (u64)-1;
11998
11999         root = btrfs_read_fs_root(root->fs_info, &key);
12000         if (IS_ERR(root)) {
12001                 fprintf(stderr, "Couldn't find owner root %llu\n",
12002                         key.objectid);
12003                 return PTR_ERR(root);
12004         }
12005
12006         trans = btrfs_start_transaction(root, 1);
12007         if (IS_ERR(trans))
12008                 return PTR_ERR(trans);
12009
12010         btrfs_init_path(&path);
12011         path.lowest_level = btrfs_header_level(eb);
12012         if (path.lowest_level)
12013                 btrfs_node_key_to_cpu(eb, &key, 0);
12014         else
12015                 btrfs_item_key_to_cpu(eb, &key, 0);
12016
12017         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12018         btrfs_commit_transaction(trans, root);
12019         btrfs_release_path(&path);
12020         return ret;
12021 }
12022
12023 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12024 {
12025         struct btrfs_path path;
12026         struct btrfs_trans_handle *trans;
12027         struct btrfs_key key;
12028         int ret;
12029
12030         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12031                bad->key.type, bad->key.offset);
12032         key.objectid = bad->root_id;
12033         key.type = BTRFS_ROOT_ITEM_KEY;
12034         key.offset = (u64)-1;
12035
12036         root = btrfs_read_fs_root(root->fs_info, &key);
12037         if (IS_ERR(root)) {
12038                 fprintf(stderr, "Couldn't find owner root %llu\n",
12039                         key.objectid);
12040                 return PTR_ERR(root);
12041         }
12042
12043         trans = btrfs_start_transaction(root, 1);
12044         if (IS_ERR(trans))
12045                 return PTR_ERR(trans);
12046
12047         btrfs_init_path(&path);
12048         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12049         if (ret) {
12050                 if (ret > 0)
12051                         ret = 0;
12052                 goto out;
12053         }
12054         ret = btrfs_del_item(trans, root, &path);
12055 out:
12056         btrfs_commit_transaction(trans, root);
12057         btrfs_release_path(&path);
12058         return ret;
12059 }
12060
12061 static int zero_log_tree(struct btrfs_root *root)
12062 {
12063         struct btrfs_trans_handle *trans;
12064         int ret;
12065
12066         trans = btrfs_start_transaction(root, 1);
12067         if (IS_ERR(trans)) {
12068                 ret = PTR_ERR(trans);
12069                 return ret;
12070         }
12071         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12072         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12073         ret = btrfs_commit_transaction(trans, root);
12074         return ret;
12075 }
12076
12077 static int populate_csum(struct btrfs_trans_handle *trans,
12078                          struct btrfs_root *csum_root, char *buf, u64 start,
12079                          u64 len)
12080 {
12081         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12082         u64 offset = 0;
12083         u64 sectorsize;
12084         int ret = 0;
12085
12086         while (offset < len) {
12087                 sectorsize = fs_info->sectorsize;
12088                 ret = read_extent_data(fs_info, buf, start + offset,
12089                                        &sectorsize, 0);
12090                 if (ret)
12091                         break;
12092                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12093                                             start + offset, buf, sectorsize);
12094                 if (ret)
12095                         break;
12096                 offset += sectorsize;
12097         }
12098         return ret;
12099 }
12100
12101 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12102                                       struct btrfs_root *csum_root,
12103                                       struct btrfs_root *cur_root)
12104 {
12105         struct btrfs_path path;
12106         struct btrfs_key key;
12107         struct extent_buffer *node;
12108         struct btrfs_file_extent_item *fi;
12109         char *buf = NULL;
12110         u64 start = 0;
12111         u64 len = 0;
12112         int slot = 0;
12113         int ret = 0;
12114
12115         buf = malloc(cur_root->fs_info->sectorsize);
12116         if (!buf)
12117                 return -ENOMEM;
12118
12119         btrfs_init_path(&path);
12120         key.objectid = 0;
12121         key.offset = 0;
12122         key.type = 0;
12123         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12124         if (ret < 0)
12125                 goto out;
12126         /* Iterate all regular file extents and fill its csum */
12127         while (1) {
12128                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12129
12130                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12131                         goto next;
12132                 node = path.nodes[0];
12133                 slot = path.slots[0];
12134                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12135                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12136                         goto next;
12137                 start = btrfs_file_extent_disk_bytenr(node, fi);
12138                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12139
12140                 ret = populate_csum(trans, csum_root, buf, start, len);
12141                 if (ret == -EEXIST)
12142                         ret = 0;
12143                 if (ret < 0)
12144                         goto out;
12145 next:
12146                 /*
12147                  * TODO: if next leaf is corrupted, jump to nearest next valid
12148                  * leaf.
12149                  */
12150                 ret = btrfs_next_item(cur_root, &path);
12151                 if (ret < 0)
12152                         goto out;
12153                 if (ret > 0) {
12154                         ret = 0;
12155                         goto out;
12156                 }
12157         }
12158
12159 out:
12160         btrfs_release_path(&path);
12161         free(buf);
12162         return ret;
12163 }
12164
12165 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12166                                   struct btrfs_root *csum_root)
12167 {
12168         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12169         struct btrfs_path path;
12170         struct btrfs_root *tree_root = fs_info->tree_root;
12171         struct btrfs_root *cur_root;
12172         struct extent_buffer *node;
12173         struct btrfs_key key;
12174         int slot = 0;
12175         int ret = 0;
12176
12177         btrfs_init_path(&path);
12178         key.objectid = BTRFS_FS_TREE_OBJECTID;
12179         key.offset = 0;
12180         key.type = BTRFS_ROOT_ITEM_KEY;
12181         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12182         if (ret < 0)
12183                 goto out;
12184         if (ret > 0) {
12185                 ret = -ENOENT;
12186                 goto out;
12187         }
12188
12189         while (1) {
12190                 node = path.nodes[0];
12191                 slot = path.slots[0];
12192                 btrfs_item_key_to_cpu(node, &key, slot);
12193                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12194                         goto out;
12195                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12196                         goto next;
12197                 if (!is_fstree(key.objectid))
12198                         goto next;
12199                 key.offset = (u64)-1;
12200
12201                 cur_root = btrfs_read_fs_root(fs_info, &key);
12202                 if (IS_ERR(cur_root) || !cur_root) {
12203                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12204                                 key.objectid);
12205                         goto out;
12206                 }
12207                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12208                                 cur_root);
12209                 if (ret < 0)
12210                         goto out;
12211 next:
12212                 ret = btrfs_next_item(tree_root, &path);
12213                 if (ret > 0) {
12214                         ret = 0;
12215                         goto out;
12216                 }
12217                 if (ret < 0)
12218                         goto out;
12219         }
12220
12221 out:
12222         btrfs_release_path(&path);
12223         return ret;
12224 }
12225
12226 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12227                                       struct btrfs_root *csum_root)
12228 {
12229         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12230         struct btrfs_path path;
12231         struct btrfs_extent_item *ei;
12232         struct extent_buffer *leaf;
12233         char *buf;
12234         struct btrfs_key key;
12235         int ret;
12236
12237         btrfs_init_path(&path);
12238         key.objectid = 0;
12239         key.type = BTRFS_EXTENT_ITEM_KEY;
12240         key.offset = 0;
12241         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12242         if (ret < 0) {
12243                 btrfs_release_path(&path);
12244                 return ret;
12245         }
12246
12247         buf = malloc(csum_root->fs_info->sectorsize);
12248         if (!buf) {
12249                 btrfs_release_path(&path);
12250                 return -ENOMEM;
12251         }
12252
12253         while (1) {
12254                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12255                         ret = btrfs_next_leaf(extent_root, &path);
12256                         if (ret < 0)
12257                                 break;
12258                         if (ret) {
12259                                 ret = 0;
12260                                 break;
12261                         }
12262                 }
12263                 leaf = path.nodes[0];
12264
12265                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12266                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12267                         path.slots[0]++;
12268                         continue;
12269                 }
12270
12271                 ei = btrfs_item_ptr(leaf, path.slots[0],
12272                                     struct btrfs_extent_item);
12273                 if (!(btrfs_extent_flags(leaf, ei) &
12274                       BTRFS_EXTENT_FLAG_DATA)) {
12275                         path.slots[0]++;
12276                         continue;
12277                 }
12278
12279                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12280                                     key.offset);
12281                 if (ret)
12282                         break;
12283                 path.slots[0]++;
12284         }
12285
12286         btrfs_release_path(&path);
12287         free(buf);
12288         return ret;
12289 }
12290
12291 /*
12292  * Recalculate the csum and put it into the csum tree.
12293  *
12294  * Extent tree init will wipe out all the extent info, so in that case, we
12295  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12296  * will use fs/subvol trees to init the csum tree.
12297  */
12298 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12299                           struct btrfs_root *csum_root,
12300                           int search_fs_tree)
12301 {
12302         if (search_fs_tree)
12303                 return fill_csum_tree_from_fs(trans, csum_root);
12304         else
12305                 return fill_csum_tree_from_extent(trans, csum_root);
12306 }
12307
12308 static void free_roots_info_cache(void)
12309 {
12310         if (!roots_info_cache)
12311                 return;
12312
12313         while (!cache_tree_empty(roots_info_cache)) {
12314                 struct cache_extent *entry;
12315                 struct root_item_info *rii;
12316
12317                 entry = first_cache_extent(roots_info_cache);
12318                 if (!entry)
12319                         break;
12320                 remove_cache_extent(roots_info_cache, entry);
12321                 rii = container_of(entry, struct root_item_info, cache_extent);
12322                 free(rii);
12323         }
12324
12325         free(roots_info_cache);
12326         roots_info_cache = NULL;
12327 }
12328
12329 static int build_roots_info_cache(struct btrfs_fs_info *info)
12330 {
12331         int ret = 0;
12332         struct btrfs_key key;
12333         struct extent_buffer *leaf;
12334         struct btrfs_path path;
12335
12336         if (!roots_info_cache) {
12337                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12338                 if (!roots_info_cache)
12339                         return -ENOMEM;
12340                 cache_tree_init(roots_info_cache);
12341         }
12342
12343         btrfs_init_path(&path);
12344         key.objectid = 0;
12345         key.type = BTRFS_EXTENT_ITEM_KEY;
12346         key.offset = 0;
12347         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12348         if (ret < 0)
12349                 goto out;
12350         leaf = path.nodes[0];
12351
12352         while (1) {
12353                 struct btrfs_key found_key;
12354                 struct btrfs_extent_item *ei;
12355                 struct btrfs_extent_inline_ref *iref;
12356                 int slot = path.slots[0];
12357                 int type;
12358                 u64 flags;
12359                 u64 root_id;
12360                 u8 level;
12361                 struct cache_extent *entry;
12362                 struct root_item_info *rii;
12363
12364                 if (slot >= btrfs_header_nritems(leaf)) {
12365                         ret = btrfs_next_leaf(info->extent_root, &path);
12366                         if (ret < 0) {
12367                                 break;
12368                         } else if (ret) {
12369                                 ret = 0;
12370                                 break;
12371                         }
12372                         leaf = path.nodes[0];
12373                         slot = path.slots[0];
12374                 }
12375
12376                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12377
12378                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12379                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12380                         goto next;
12381
12382                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12383                 flags = btrfs_extent_flags(leaf, ei);
12384
12385                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12386                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12387                         goto next;
12388
12389                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12390                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12391                         level = found_key.offset;
12392                 } else {
12393                         struct btrfs_tree_block_info *binfo;
12394
12395                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12396                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12397                         level = btrfs_tree_block_level(leaf, binfo);
12398                 }
12399
12400                 /*
12401                  * For a root extent, it must be of the following type and the
12402                  * first (and only one) iref in the item.
12403                  */
12404                 type = btrfs_extent_inline_ref_type(leaf, iref);
12405                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12406                         goto next;
12407
12408                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12409                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12410                 if (!entry) {
12411                         rii = malloc(sizeof(struct root_item_info));
12412                         if (!rii) {
12413                                 ret = -ENOMEM;
12414                                 goto out;
12415                         }
12416                         rii->cache_extent.start = root_id;
12417                         rii->cache_extent.size = 1;
12418                         rii->level = (u8)-1;
12419                         entry = &rii->cache_extent;
12420                         ret = insert_cache_extent(roots_info_cache, entry);
12421                         ASSERT(ret == 0);
12422                 } else {
12423                         rii = container_of(entry, struct root_item_info,
12424                                            cache_extent);
12425                 }
12426
12427                 ASSERT(rii->cache_extent.start == root_id);
12428                 ASSERT(rii->cache_extent.size == 1);
12429
12430                 if (level > rii->level || rii->level == (u8)-1) {
12431                         rii->level = level;
12432                         rii->bytenr = found_key.objectid;
12433                         rii->gen = btrfs_extent_generation(leaf, ei);
12434                         rii->node_count = 1;
12435                 } else if (level == rii->level) {
12436                         rii->node_count++;
12437                 }
12438 next:
12439                 path.slots[0]++;
12440         }
12441
12442 out:
12443         btrfs_release_path(&path);
12444
12445         return ret;
12446 }
12447
12448 static int maybe_repair_root_item(struct btrfs_path *path,
12449                                   const struct btrfs_key *root_key,
12450                                   const int read_only_mode)
12451 {
12452         const u64 root_id = root_key->objectid;
12453         struct cache_extent *entry;
12454         struct root_item_info *rii;
12455         struct btrfs_root_item ri;
12456         unsigned long offset;
12457
12458         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12459         if (!entry) {
12460                 fprintf(stderr,
12461                         "Error: could not find extent items for root %llu\n",
12462                         root_key->objectid);
12463                 return -ENOENT;
12464         }
12465
12466         rii = container_of(entry, struct root_item_info, cache_extent);
12467         ASSERT(rii->cache_extent.start == root_id);
12468         ASSERT(rii->cache_extent.size == 1);
12469
12470         if (rii->node_count != 1) {
12471                 fprintf(stderr,
12472                         "Error: could not find btree root extent for root %llu\n",
12473                         root_id);
12474                 return -ENOENT;
12475         }
12476
12477         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12478         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12479
12480         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12481             btrfs_root_level(&ri) != rii->level ||
12482             btrfs_root_generation(&ri) != rii->gen) {
12483
12484                 /*
12485                  * If we're in repair mode but our caller told us to not update
12486                  * the root item, i.e. just check if it needs to be updated, don't
12487                  * print this message, since the caller will call us again shortly
12488                  * for the same root item without read only mode (the caller will
12489                  * open a transaction first).
12490                  */
12491                 if (!(read_only_mode && repair))
12492                         fprintf(stderr,
12493                                 "%sroot item for root %llu,"
12494                                 " current bytenr %llu, current gen %llu, current level %u,"
12495                                 " new bytenr %llu, new gen %llu, new level %u\n",
12496                                 (read_only_mode ? "" : "fixing "),
12497                                 root_id,
12498                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12499                                 btrfs_root_level(&ri),
12500                                 rii->bytenr, rii->gen, rii->level);
12501
12502                 if (btrfs_root_generation(&ri) > rii->gen) {
12503                         fprintf(stderr,
12504                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12505                                 root_id, btrfs_root_generation(&ri), rii->gen);
12506                         return -EINVAL;
12507                 }
12508
12509                 if (!read_only_mode) {
12510                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12511                         btrfs_set_root_level(&ri, rii->level);
12512                         btrfs_set_root_generation(&ri, rii->gen);
12513                         write_extent_buffer(path->nodes[0], &ri,
12514                                             offset, sizeof(ri));
12515                 }
12516
12517                 return 1;
12518         }
12519
12520         return 0;
12521 }
12522
12523 /*
12524  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12525  * caused read-only snapshots to be corrupted if they were created at a moment
12526  * when the source subvolume/snapshot had orphan items. The issue was that the
12527  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12528  * node instead of the post orphan cleanup root node.
12529  * So this function, and its callees, just detects and fixes those cases. Even
12530  * though the regression was for read-only snapshots, this function applies to
12531  * any snapshot/subvolume root.
12532  * This must be run before any other repair code - not doing it so, makes other
12533  * repair code delete or modify backrefs in the extent tree for example, which
12534  * will result in an inconsistent fs after repairing the root items.
12535  */
12536 static int repair_root_items(struct btrfs_fs_info *info)
12537 {
12538         struct btrfs_path path;
12539         struct btrfs_key key;
12540         struct extent_buffer *leaf;
12541         struct btrfs_trans_handle *trans = NULL;
12542         int ret = 0;
12543         int bad_roots = 0;
12544         int need_trans = 0;
12545
12546         btrfs_init_path(&path);
12547
12548         ret = build_roots_info_cache(info);
12549         if (ret)
12550                 goto out;
12551
12552         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12553         key.type = BTRFS_ROOT_ITEM_KEY;
12554         key.offset = 0;
12555
12556 again:
12557         /*
12558          * Avoid opening and committing transactions if a leaf doesn't have
12559          * any root items that need to be fixed, so that we avoid rotating
12560          * backup roots unnecessarily.
12561          */
12562         if (need_trans) {
12563                 trans = btrfs_start_transaction(info->tree_root, 1);
12564                 if (IS_ERR(trans)) {
12565                         ret = PTR_ERR(trans);
12566                         goto out;
12567                 }
12568         }
12569
12570         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12571                                 0, trans ? 1 : 0);
12572         if (ret < 0)
12573                 goto out;
12574         leaf = path.nodes[0];
12575
12576         while (1) {
12577                 struct btrfs_key found_key;
12578
12579                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12580                         int no_more_keys = find_next_key(&path, &key);
12581
12582                         btrfs_release_path(&path);
12583                         if (trans) {
12584                                 ret = btrfs_commit_transaction(trans,
12585                                                                info->tree_root);
12586                                 trans = NULL;
12587                                 if (ret < 0)
12588                                         goto out;
12589                         }
12590                         need_trans = 0;
12591                         if (no_more_keys)
12592                                 break;
12593                         goto again;
12594                 }
12595
12596                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12597
12598                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12599                         goto next;
12600                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12601                         goto next;
12602
12603                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12604                 if (ret < 0)
12605                         goto out;
12606                 if (ret) {
12607                         if (!trans && repair) {
12608                                 need_trans = 1;
12609                                 key = found_key;
12610                                 btrfs_release_path(&path);
12611                                 goto again;
12612                         }
12613                         bad_roots++;
12614                 }
12615 next:
12616                 path.slots[0]++;
12617         }
12618         ret = 0;
12619 out:
12620         free_roots_info_cache();
12621         btrfs_release_path(&path);
12622         if (trans)
12623                 btrfs_commit_transaction(trans, info->tree_root);
12624         if (ret < 0)
12625                 return ret;
12626
12627         return bad_roots;
12628 }
12629
12630 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12631 {
12632         struct btrfs_trans_handle *trans;
12633         struct btrfs_block_group_cache *bg_cache;
12634         u64 current = 0;
12635         int ret = 0;
12636
12637         /* Clear all free space cache inodes and its extent data */
12638         while (1) {
12639                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12640                 if (!bg_cache)
12641                         break;
12642                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12643                 if (ret < 0)
12644                         return ret;
12645                 current = bg_cache->key.objectid + bg_cache->key.offset;
12646         }
12647
12648         /* Don't forget to set cache_generation to -1 */
12649         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12650         if (IS_ERR(trans)) {
12651                 error("failed to update super block cache generation");
12652                 return PTR_ERR(trans);
12653         }
12654         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12655         btrfs_commit_transaction(trans, fs_info->tree_root);
12656
12657         return ret;
12658 }
12659
12660 const char * const cmd_check_usage[] = {
12661         "btrfs check [options] <device>",
12662         "Check structural integrity of a filesystem (unmounted).",
12663         "Check structural integrity of an unmounted filesystem. Verify internal",
12664         "trees' consistency and item connectivity. In the repair mode try to",
12665         "fix the problems found. ",
12666         "WARNING: the repair mode is considered dangerous",
12667         "",
12668         "-s|--super <superblock>     use this superblock copy",
12669         "-b|--backup                 use the first valid backup root copy",
12670         "--repair                    try to repair the filesystem",
12671         "--readonly                  run in read-only mode (default)",
12672         "--init-csum-tree            create a new CRC tree",
12673         "--init-extent-tree          create a new extent tree",
12674         "--mode <MODE>               allows choice of memory/IO trade-offs",
12675         "                            where MODE is one of:",
12676         "                            original - read inodes and extents to memory (requires",
12677         "                                       more memory, does less IO)",
12678         "                            lowmem   - try to use less memory but read blocks again",
12679         "                                       when needed",
12680         "--check-data-csum           verify checksums of data blocks",
12681         "-Q|--qgroup-report          print a report on qgroup consistency",
12682         "-E|--subvol-extents <subvolid>",
12683         "                            print subvolume extents and sharing state",
12684         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12685         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12686         "-p|--progress               indicate progress",
12687         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12688         NULL
12689 };
12690
12691 int cmd_check(int argc, char **argv)
12692 {
12693         struct cache_tree root_cache;
12694         struct btrfs_root *root;
12695         struct btrfs_fs_info *info;
12696         u64 bytenr = 0;
12697         u64 subvolid = 0;
12698         u64 tree_root_bytenr = 0;
12699         u64 chunk_root_bytenr = 0;
12700         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12701         int ret;
12702         int err = 0;
12703         u64 num;
12704         int init_csum_tree = 0;
12705         int readonly = 0;
12706         int clear_space_cache = 0;
12707         int qgroup_report = 0;
12708         int qgroups_repaired = 0;
12709         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12710
12711         while(1) {
12712                 int c;
12713                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12714                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12715                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12716                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12717                 static const struct option long_options[] = {
12718                         { "super", required_argument, NULL, 's' },
12719                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12720                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12721                         { "init-csum-tree", no_argument, NULL,
12722                                 GETOPT_VAL_INIT_CSUM },
12723                         { "init-extent-tree", no_argument, NULL,
12724                                 GETOPT_VAL_INIT_EXTENT },
12725                         { "check-data-csum", no_argument, NULL,
12726                                 GETOPT_VAL_CHECK_CSUM },
12727                         { "backup", no_argument, NULL, 'b' },
12728                         { "subvol-extents", required_argument, NULL, 'E' },
12729                         { "qgroup-report", no_argument, NULL, 'Q' },
12730                         { "tree-root", required_argument, NULL, 'r' },
12731                         { "chunk-root", required_argument, NULL,
12732                                 GETOPT_VAL_CHUNK_TREE },
12733                         { "progress", no_argument, NULL, 'p' },
12734                         { "mode", required_argument, NULL,
12735                                 GETOPT_VAL_MODE },
12736                         { "clear-space-cache", required_argument, NULL,
12737                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12738                         { NULL, 0, NULL, 0}
12739                 };
12740
12741                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12742                 if (c < 0)
12743                         break;
12744                 switch(c) {
12745                         case 'a': /* ignored */ break;
12746                         case 'b':
12747                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12748                                 break;
12749                         case 's':
12750                                 num = arg_strtou64(optarg);
12751                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12752                                         error(
12753                                         "super mirror should be less than %d",
12754                                                 BTRFS_SUPER_MIRROR_MAX);
12755                                         exit(1);
12756                                 }
12757                                 bytenr = btrfs_sb_offset(((int)num));
12758                                 printf("using SB copy %llu, bytenr %llu\n", num,
12759                                        (unsigned long long)bytenr);
12760                                 break;
12761                         case 'Q':
12762                                 qgroup_report = 1;
12763                                 break;
12764                         case 'E':
12765                                 subvolid = arg_strtou64(optarg);
12766                                 break;
12767                         case 'r':
12768                                 tree_root_bytenr = arg_strtou64(optarg);
12769                                 break;
12770                         case GETOPT_VAL_CHUNK_TREE:
12771                                 chunk_root_bytenr = arg_strtou64(optarg);
12772                                 break;
12773                         case 'p':
12774                                 ctx.progress_enabled = true;
12775                                 break;
12776                         case '?':
12777                         case 'h':
12778                                 usage(cmd_check_usage);
12779                         case GETOPT_VAL_REPAIR:
12780                                 printf("enabling repair mode\n");
12781                                 repair = 1;
12782                                 ctree_flags |= OPEN_CTREE_WRITES;
12783                                 break;
12784                         case GETOPT_VAL_READONLY:
12785                                 readonly = 1;
12786                                 break;
12787                         case GETOPT_VAL_INIT_CSUM:
12788                                 printf("Creating a new CRC tree\n");
12789                                 init_csum_tree = 1;
12790                                 repair = 1;
12791                                 ctree_flags |= OPEN_CTREE_WRITES;
12792                                 break;
12793                         case GETOPT_VAL_INIT_EXTENT:
12794                                 init_extent_tree = 1;
12795                                 ctree_flags |= (OPEN_CTREE_WRITES |
12796                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12797                                 repair = 1;
12798                                 break;
12799                         case GETOPT_VAL_CHECK_CSUM:
12800                                 check_data_csum = 1;
12801                                 break;
12802                         case GETOPT_VAL_MODE:
12803                                 check_mode = parse_check_mode(optarg);
12804                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12805                                         error("unknown mode: %s", optarg);
12806                                         exit(1);
12807                                 }
12808                                 break;
12809                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12810                                 if (strcmp(optarg, "v1") == 0) {
12811                                         clear_space_cache = 1;
12812                                 } else if (strcmp(optarg, "v2") == 0) {
12813                                         clear_space_cache = 2;
12814                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12815                                 } else {
12816                                         error(
12817                 "invalid argument to --clear-space-cache, must be v1 or v2");
12818                                         exit(1);
12819                                 }
12820                                 ctree_flags |= OPEN_CTREE_WRITES;
12821                                 break;
12822                 }
12823         }
12824
12825         if (check_argc_exact(argc - optind, 1))
12826                 usage(cmd_check_usage);
12827
12828         if (ctx.progress_enabled) {
12829                 ctx.tp = TASK_NOTHING;
12830                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12831         }
12832
12833         /* This check is the only reason for --readonly to exist */
12834         if (readonly && repair) {
12835                 error("repair options are not compatible with --readonly");
12836                 exit(1);
12837         }
12838
12839         /*
12840          * Not supported yet
12841          */
12842         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12843                 error("low memory mode doesn't support repair yet");
12844                 exit(1);
12845         }
12846
12847         radix_tree_init();
12848         cache_tree_init(&root_cache);
12849
12850         if((ret = check_mounted(argv[optind])) < 0) {
12851                 error("could not check mount status: %s", strerror(-ret));
12852                 err |= !!ret;
12853                 goto err_out;
12854         } else if(ret) {
12855                 error("%s is currently mounted, aborting", argv[optind]);
12856                 ret = -EBUSY;
12857                 err |= !!ret;
12858                 goto err_out;
12859         }
12860
12861         /* only allow partial opening under repair mode */
12862         if (repair)
12863                 ctree_flags |= OPEN_CTREE_PARTIAL;
12864
12865         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12866                                   chunk_root_bytenr, ctree_flags);
12867         if (!info) {
12868                 error("cannot open file system");
12869                 ret = -EIO;
12870                 err |= !!ret;
12871                 goto err_out;
12872         }
12873
12874         global_info = info;
12875         root = info->fs_root;
12876         if (clear_space_cache == 1) {
12877                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12878                         error(
12879                 "free space cache v2 detected, use --clear-space-cache v2");
12880                         ret = 1;
12881                         goto close_out;
12882                 }
12883                 printf("Clearing free space cache\n");
12884                 ret = clear_free_space_cache(info);
12885                 if (ret) {
12886                         error("failed to clear free space cache");
12887                         ret = 1;
12888                 } else {
12889                         printf("Free space cache cleared\n");
12890                 }
12891                 goto close_out;
12892         } else if (clear_space_cache == 2) {
12893                 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12894                         printf("no free space cache v2 to clear\n");
12895                         ret = 0;
12896                         goto close_out;
12897                 }
12898                 printf("Clear free space cache v2\n");
12899                 ret = btrfs_clear_free_space_tree(info);
12900                 if (ret) {
12901                         error("failed to clear free space cache v2: %d", ret);
12902                         ret = 1;
12903                 } else {
12904                         printf("free space cache v2 cleared\n");
12905                 }
12906                 goto close_out;
12907         }
12908
12909         /*
12910          * repair mode will force us to commit transaction which
12911          * will make us fail to load log tree when mounting.
12912          */
12913         if (repair && btrfs_super_log_root(info->super_copy)) {
12914                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12915                 if (!ret) {
12916                         ret = 1;
12917                         err |= !!ret;
12918                         goto close_out;
12919                 }
12920                 ret = zero_log_tree(root);
12921                 err |= !!ret;
12922                 if (ret) {
12923                         error("failed to zero log tree: %d", ret);
12924                         goto close_out;
12925                 }
12926         }
12927
12928         uuid_unparse(info->super_copy->fsid, uuidbuf);
12929         if (qgroup_report) {
12930                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12931                        uuidbuf);
12932                 ret = qgroup_verify_all(info);
12933                 err |= !!ret;
12934                 if (ret == 0)
12935                         report_qgroups(1);
12936                 goto close_out;
12937         }
12938         if (subvolid) {
12939                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12940                        subvolid, argv[optind], uuidbuf);
12941                 ret = print_extent_state(info, subvolid);
12942                 err |= !!ret;
12943                 goto close_out;
12944         }
12945         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12946
12947         if (!extent_buffer_uptodate(info->tree_root->node) ||
12948             !extent_buffer_uptodate(info->dev_root->node) ||
12949             !extent_buffer_uptodate(info->chunk_root->node)) {
12950                 error("critical roots corrupted, unable to check the filesystem");
12951                 err |= !!ret;
12952                 ret = -EIO;
12953                 goto close_out;
12954         }
12955
12956         if (init_extent_tree || init_csum_tree) {
12957                 struct btrfs_trans_handle *trans;
12958
12959                 trans = btrfs_start_transaction(info->extent_root, 0);
12960                 if (IS_ERR(trans)) {
12961                         error("error starting transaction");
12962                         ret = PTR_ERR(trans);
12963                         err |= !!ret;
12964                         goto close_out;
12965                 }
12966
12967                 if (init_extent_tree) {
12968                         printf("Creating a new extent tree\n");
12969                         ret = reinit_extent_tree(trans, info);
12970                         err |= !!ret;
12971                         if (ret)
12972                                 goto close_out;
12973                 }
12974
12975                 if (init_csum_tree) {
12976                         printf("Reinitialize checksum tree\n");
12977                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12978                         if (ret) {
12979                                 error("checksum tree initialization failed: %d",
12980                                                 ret);
12981                                 ret = -EIO;
12982                                 err |= !!ret;
12983                                 goto close_out;
12984                         }
12985
12986                         ret = fill_csum_tree(trans, info->csum_root,
12987                                              init_extent_tree);
12988                         err |= !!ret;
12989                         if (ret) {
12990                                 error("checksum tree refilling failed: %d", ret);
12991                                 return -EIO;
12992                         }
12993                 }
12994                 /*
12995                  * Ok now we commit and run the normal fsck, which will add
12996                  * extent entries for all of the items it finds.
12997                  */
12998                 ret = btrfs_commit_transaction(trans, info->extent_root);
12999                 err |= !!ret;
13000                 if (ret)
13001                         goto close_out;
13002         }
13003         if (!extent_buffer_uptodate(info->extent_root->node)) {
13004                 error("critical: extent_root, unable to check the filesystem");
13005                 ret = -EIO;
13006                 err |= !!ret;
13007                 goto close_out;
13008         }
13009         if (!extent_buffer_uptodate(info->csum_root->node)) {
13010                 error("critical: csum_root, unable to check the filesystem");
13011                 ret = -EIO;
13012                 err |= !!ret;
13013                 goto close_out;
13014         }
13015
13016         if (!ctx.progress_enabled)
13017                 fprintf(stderr, "checking extents\n");
13018         if (check_mode == CHECK_MODE_LOWMEM)
13019                 ret = check_chunks_and_extents_v2(root);
13020         else
13021                 ret = check_chunks_and_extents(root);
13022         err |= !!ret;
13023         if (ret)
13024                 error(
13025                 "errors found in extent allocation tree or chunk allocation");
13026
13027         ret = repair_root_items(info);
13028         err |= !!ret;
13029         if (ret < 0) {
13030                 error("failed to repair root items: %s", strerror(-ret));
13031                 goto close_out;
13032         }
13033         if (repair) {
13034                 fprintf(stderr, "Fixed %d roots.\n", ret);
13035                 ret = 0;
13036         } else if (ret > 0) {
13037                 fprintf(stderr,
13038                        "Found %d roots with an outdated root item.\n",
13039                        ret);
13040                 fprintf(stderr,
13041                         "Please run a filesystem check with the option --repair to fix them.\n");
13042                 ret = 1;
13043                 err |= !!ret;
13044                 goto close_out;
13045         }
13046
13047         if (!ctx.progress_enabled) {
13048                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13049                         fprintf(stderr, "checking free space tree\n");
13050                 else
13051                         fprintf(stderr, "checking free space cache\n");
13052         }
13053         ret = check_space_cache(root);
13054         err |= !!ret;
13055         if (ret) {
13056                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13057                         error("errors found in free space tree");
13058                 else
13059                         error("errors found in free space cache");
13060                 goto out;
13061         }
13062
13063         /*
13064          * We used to have to have these hole extents in between our real
13065          * extents so if we don't have this flag set we need to make sure there
13066          * are no gaps in the file extents for inodes, otherwise we can just
13067          * ignore it when this happens.
13068          */
13069         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13070         if (!ctx.progress_enabled)
13071                 fprintf(stderr, "checking fs roots\n");
13072         if (check_mode == CHECK_MODE_LOWMEM)
13073                 ret = check_fs_roots_v2(root->fs_info);
13074         else
13075                 ret = check_fs_roots(root, &root_cache);
13076         err |= !!ret;
13077         if (ret) {
13078                 error("errors found in fs roots");
13079                 goto out;
13080         }
13081
13082         fprintf(stderr, "checking csums\n");
13083         ret = check_csums(root);
13084         err |= !!ret;
13085         if (ret) {
13086                 error("errors found in csum tree");
13087                 goto out;
13088         }
13089
13090         fprintf(stderr, "checking root refs\n");
13091         /* For low memory mode, check_fs_roots_v2 handles root refs */
13092         if (check_mode != CHECK_MODE_LOWMEM) {
13093                 ret = check_root_refs(root, &root_cache);
13094                 err |= !!ret;
13095                 if (ret) {
13096                         error("errors found in root refs");
13097                         goto out;
13098                 }
13099         }
13100
13101         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13102                 struct extent_buffer *eb;
13103
13104                 eb = list_first_entry(&root->fs_info->recow_ebs,
13105                                       struct extent_buffer, recow);
13106                 list_del_init(&eb->recow);
13107                 ret = recow_extent_buffer(root, eb);
13108                 err |= !!ret;
13109                 if (ret) {
13110                         error("fails to fix transid errors");
13111                         break;
13112                 }
13113         }
13114
13115         while (!list_empty(&delete_items)) {
13116                 struct bad_item *bad;
13117
13118                 bad = list_first_entry(&delete_items, struct bad_item, list);
13119                 list_del_init(&bad->list);
13120                 if (repair) {
13121                         ret = delete_bad_item(root, bad);
13122                         err |= !!ret;
13123                 }
13124                 free(bad);
13125         }
13126
13127         if (info->quota_enabled) {
13128                 fprintf(stderr, "checking quota groups\n");
13129                 ret = qgroup_verify_all(info);
13130                 err |= !!ret;
13131                 if (ret) {
13132                         error("failed to check quota groups");
13133                         goto out;
13134                 }
13135                 report_qgroups(0);
13136                 ret = repair_qgroups(info, &qgroups_repaired);
13137                 err |= !!ret;
13138                 if (err) {
13139                         error("failed to repair quota groups");
13140                         goto out;
13141                 }
13142                 ret = 0;
13143         }
13144
13145         if (!list_empty(&root->fs_info->recow_ebs)) {
13146                 error("transid errors in file system");
13147                 ret = 1;
13148                 err |= !!ret;
13149         }
13150 out:
13151         if (found_old_backref) { /*
13152                  * there was a disk format change when mixed
13153                  * backref was in testing tree. The old format
13154                  * existed about one week.
13155                  */
13156                 printf("\n * Found old mixed backref format. "
13157                        "The old format is not supported! *"
13158                        "\n * Please mount the FS in readonly mode, "
13159                        "backup data and re-format the FS. *\n\n");
13160                 err |= 1;
13161         }
13162         printf("found %llu bytes used, ",
13163                (unsigned long long)bytes_used);
13164         if (err)
13165                 printf("error(s) found\n");
13166         else
13167                 printf("no error found\n");
13168         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13169         printf("total tree bytes: %llu\n",
13170                (unsigned long long)total_btree_bytes);
13171         printf("total fs tree bytes: %llu\n",
13172                (unsigned long long)total_fs_tree_bytes);
13173         printf("total extent tree bytes: %llu\n",
13174                (unsigned long long)total_extent_tree_bytes);
13175         printf("btree space waste bytes: %llu\n",
13176                (unsigned long long)btree_space_waste);
13177         printf("file data blocks allocated: %llu\n referenced %llu\n",
13178                 (unsigned long long)data_bytes_allocated,
13179                 (unsigned long long)data_bytes_referenced);
13180
13181         free_qgroup_counts();
13182         free_root_recs_tree(&root_cache);
13183 close_out:
13184         close_ctree(root);
13185 err_out:
13186         if (ctx.progress_enabled)
13187                 task_deinit(ctx.info);
13188
13189         return err;
13190 }