btrfs-progs: tests: Remove misleading BCP 78 boilerplate from SHA implementation
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 /*
140  * Much like data_backref, just removed the undetermined members
141  * and change it to use list_head.
142  * During extent scan, it is stored in root->orphan_data_extent.
143  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
144  */
145 struct orphan_data_extent {
146         struct list_head list;
147         u64 root;
148         u64 objectid;
149         u64 offset;
150         u64 disk_bytenr;
151         u64 disk_len;
152 };
153
154 struct tree_backref {
155         struct extent_backref node;
156         union {
157                 u64 parent;
158                 u64 root;
159         };
160 };
161
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
163 {
164         return container_of(back, struct tree_backref, node);
165 }
166
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
169
170 struct extent_record {
171         struct list_head backrefs;
172         struct list_head dups;
173         struct list_head list;
174         struct cache_extent cache;
175         struct btrfs_disk_key parent_key;
176         u64 start;
177         u64 max_size;
178         u64 nr;
179         u64 refs;
180         u64 extent_item_refs;
181         u64 generation;
182         u64 parent_generation;
183         u64 info_objectid;
184         u32 num_duplicates;
185         u8 info_level;
186         unsigned int flag_block_full_backref:2;
187         unsigned int found_rec:1;
188         unsigned int content_checked:1;
189         unsigned int owner_ref_checked:1;
190         unsigned int is_root:1;
191         unsigned int metadata:1;
192         unsigned int bad_full_backref:1;
193         unsigned int crossing_stripes:1;
194         unsigned int wrong_chunk_type:1;
195 };
196
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
198 {
199         return container_of(entry, struct extent_record, list);
200 }
201
202 struct inode_backref {
203         struct list_head list;
204         unsigned int found_dir_item:1;
205         unsigned int found_dir_index:1;
206         unsigned int found_inode_ref:1;
207         u8 filetype;
208         u8 ref_type;
209         int errors;
210         u64 dir;
211         u64 index;
212         u16 namelen;
213         char name[0];
214 };
215
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
217 {
218         return list_entry(entry, struct inode_backref, list);
219 }
220
221 struct root_item_record {
222         struct list_head list;
223         u64 objectid;
224         u64 bytenr;
225         u64 last_snapshot;
226         u8 level;
227         u8 drop_level;
228         struct btrfs_key drop_key;
229 };
230
231 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
232 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
233 #define REF_ERR_NO_INODE_REF            (1 << 2)
234 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
235 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
236 #define REF_ERR_DUP_INODE_REF           (1 << 5)
237 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
238 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
239 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
240 #define REF_ERR_NO_ROOT_REF             (1 << 9)
241 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
242 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
243 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
244
245 struct file_extent_hole {
246         struct rb_node node;
247         u64 start;
248         u64 len;
249 };
250
251 struct inode_record {
252         struct list_head backrefs;
253         unsigned int checked:1;
254         unsigned int merging:1;
255         unsigned int found_inode_item:1;
256         unsigned int found_dir_item:1;
257         unsigned int found_file_extent:1;
258         unsigned int found_csum_item:1;
259         unsigned int some_csum_missing:1;
260         unsigned int nodatasum:1;
261         int errors;
262
263         u64 ino;
264         u32 nlink;
265         u32 imode;
266         u64 isize;
267         u64 nbytes;
268
269         u32 found_link;
270         u64 found_size;
271         u64 extent_start;
272         u64 extent_end;
273         struct rb_root holes;
274         struct list_head orphan_extents;
275
276         u32 refs;
277 };
278
279 #define I_ERR_NO_INODE_ITEM             (1 << 0)
280 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
281 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
282 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
283 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
284 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
285 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
286 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
287 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
288 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
289 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
290 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
291 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
292 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
293 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
294
295 struct root_backref {
296         struct list_head list;
297         unsigned int found_dir_item:1;
298         unsigned int found_dir_index:1;
299         unsigned int found_back_ref:1;
300         unsigned int found_forward_ref:1;
301         unsigned int reachable:1;
302         int errors;
303         u64 ref_root;
304         u64 dir;
305         u64 index;
306         u16 namelen;
307         char name[0];
308 };
309
310 static inline struct root_backref* to_root_backref(struct list_head *entry)
311 {
312         return list_entry(entry, struct root_backref, list);
313 }
314
315 struct root_record {
316         struct list_head backrefs;
317         struct cache_extent cache;
318         unsigned int found_root_item:1;
319         u64 objectid;
320         u32 found_ref;
321 };
322
323 struct ptr_node {
324         struct cache_extent cache;
325         void *data;
326 };
327
328 struct shared_node {
329         struct cache_extent cache;
330         struct cache_tree root_cache;
331         struct cache_tree inode_cache;
332         struct inode_record *current;
333         u32 refs;
334 };
335
336 struct block_info {
337         u64 start;
338         u32 size;
339 };
340
341 struct walk_control {
342         struct cache_tree shared;
343         struct shared_node *nodes[BTRFS_MAX_LEVEL];
344         int active_node;
345         int root_level;
346 };
347
348 struct bad_item {
349         struct btrfs_key key;
350         u64 root_id;
351         struct list_head list;
352 };
353
354 struct extent_entry {
355         u64 bytenr;
356         u64 bytes;
357         int count;
358         int broken;
359         struct list_head list;
360 };
361
362 struct root_item_info {
363         /* level of the root */
364         u8 level;
365         /* number of nodes at this level, must be 1 for a root */
366         int node_count;
367         u64 bytenr;
368         u64 gen;
369         struct cache_extent cache_extent;
370 };
371
372 /*
373  * Error bit for low memory mode check.
374  *
375  * Currently no caller cares about it yet.  Just internal use for error
376  * classification.
377  */
378 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
379 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
380 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
381 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
382 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
383 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
384 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
385 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
386 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
387 #define CHUNK_TYPE_MISMATCH     (1 << 8)
388
389 static void *print_status_check(void *p)
390 {
391         struct task_ctx *priv = p;
392         const char work_indicator[] = { '.', 'o', 'O', 'o' };
393         uint32_t count = 0;
394         static char *task_position_string[] = {
395                 "checking extents",
396                 "checking free space cache",
397                 "checking fs roots",
398         };
399
400         task_period_start(priv->info, 1000 /* 1s */);
401
402         if (priv->tp == TASK_NOTHING)
403                 return NULL;
404
405         while (1) {
406                 printf("%s [%c]\r", task_position_string[priv->tp],
407                                 work_indicator[count % 4]);
408                 count++;
409                 fflush(stdout);
410                 task_period_wait(priv->info);
411         }
412         return NULL;
413 }
414
415 static int print_status_return(void *p)
416 {
417         printf("\n");
418         fflush(stdout);
419
420         return 0;
421 }
422
423 static enum btrfs_check_mode parse_check_mode(const char *str)
424 {
425         if (strcmp(str, "lowmem") == 0)
426                 return CHECK_MODE_LOWMEM;
427         if (strcmp(str, "orig") == 0)
428                 return CHECK_MODE_ORIGINAL;
429         if (strcmp(str, "original") == 0)
430                 return CHECK_MODE_ORIGINAL;
431
432         return CHECK_MODE_UNKNOWN;
433 }
434
435 /* Compatible function to allow reuse of old codes */
436 static u64 first_extent_gap(struct rb_root *holes)
437 {
438         struct file_extent_hole *hole;
439
440         if (RB_EMPTY_ROOT(holes))
441                 return (u64)-1;
442
443         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
444         return hole->start;
445 }
446
447 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
448 {
449         struct file_extent_hole *hole1;
450         struct file_extent_hole *hole2;
451
452         hole1 = rb_entry(node1, struct file_extent_hole, node);
453         hole2 = rb_entry(node2, struct file_extent_hole, node);
454
455         if (hole1->start > hole2->start)
456                 return -1;
457         if (hole1->start < hole2->start)
458                 return 1;
459         /* Now hole1->start == hole2->start */
460         if (hole1->len >= hole2->len)
461                 /*
462                  * Hole 1 will be merge center
463                  * Same hole will be merged later
464                  */
465                 return -1;
466         /* Hole 2 will be merge center */
467         return 1;
468 }
469
470 /*
471  * Add a hole to the record
472  *
473  * This will do hole merge for copy_file_extent_holes(),
474  * which will ensure there won't be continuous holes.
475  */
476 static int add_file_extent_hole(struct rb_root *holes,
477                                 u64 start, u64 len)
478 {
479         struct file_extent_hole *hole;
480         struct file_extent_hole *prev = NULL;
481         struct file_extent_hole *next = NULL;
482
483         hole = malloc(sizeof(*hole));
484         if (!hole)
485                 return -ENOMEM;
486         hole->start = start;
487         hole->len = len;
488         /* Since compare will not return 0, no -EEXIST will happen */
489         rb_insert(holes, &hole->node, compare_hole);
490
491         /* simple merge with previous hole */
492         if (rb_prev(&hole->node))
493                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
494                                 node);
495         if (prev && prev->start + prev->len >= hole->start) {
496                 hole->len = hole->start + hole->len - prev->start;
497                 hole->start = prev->start;
498                 rb_erase(&prev->node, holes);
499                 free(prev);
500                 prev = NULL;
501         }
502
503         /* iterate merge with next holes */
504         while (1) {
505                 if (!rb_next(&hole->node))
506                         break;
507                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
508                                         node);
509                 if (hole->start + hole->len >= next->start) {
510                         if (hole->start + hole->len <= next->start + next->len)
511                                 hole->len = next->start + next->len -
512                                             hole->start;
513                         rb_erase(&next->node, holes);
514                         free(next);
515                         next = NULL;
516                 } else
517                         break;
518         }
519         return 0;
520 }
521
522 static int compare_hole_range(struct rb_node *node, void *data)
523 {
524         struct file_extent_hole *hole;
525         u64 start;
526
527         hole = (struct file_extent_hole *)data;
528         start = hole->start;
529
530         hole = rb_entry(node, struct file_extent_hole, node);
531         if (start < hole->start)
532                 return -1;
533         if (start >= hole->start && start < hole->start + hole->len)
534                 return 0;
535         return 1;
536 }
537
538 /*
539  * Delete a hole in the record
540  *
541  * This will do the hole split and is much restrict than add.
542  */
543 static int del_file_extent_hole(struct rb_root *holes,
544                                 u64 start, u64 len)
545 {
546         struct file_extent_hole *hole;
547         struct file_extent_hole tmp;
548         u64 prev_start = 0;
549         u64 prev_len = 0;
550         u64 next_start = 0;
551         u64 next_len = 0;
552         struct rb_node *node;
553         int have_prev = 0;
554         int have_next = 0;
555         int ret = 0;
556
557         tmp.start = start;
558         tmp.len = len;
559         node = rb_search(holes, &tmp, compare_hole_range, NULL);
560         if (!node)
561                 return -EEXIST;
562         hole = rb_entry(node, struct file_extent_hole, node);
563         if (start + len > hole->start + hole->len)
564                 return -EEXIST;
565
566         /*
567          * Now there will be no overlap, delete the hole and re-add the
568          * split(s) if they exists.
569          */
570         if (start > hole->start) {
571                 prev_start = hole->start;
572                 prev_len = start - hole->start;
573                 have_prev = 1;
574         }
575         if (hole->start + hole->len > start + len) {
576                 next_start = start + len;
577                 next_len = hole->start + hole->len - start - len;
578                 have_next = 1;
579         }
580         rb_erase(node, holes);
581         free(hole);
582         if (have_prev) {
583                 ret = add_file_extent_hole(holes, prev_start, prev_len);
584                 if (ret < 0)
585                         return ret;
586         }
587         if (have_next) {
588                 ret = add_file_extent_hole(holes, next_start, next_len);
589                 if (ret < 0)
590                         return ret;
591         }
592         return 0;
593 }
594
595 static int copy_file_extent_holes(struct rb_root *dst,
596                                   struct rb_root *src)
597 {
598         struct file_extent_hole *hole;
599         struct rb_node *node;
600         int ret = 0;
601
602         node = rb_first(src);
603         while (node) {
604                 hole = rb_entry(node, struct file_extent_hole, node);
605                 ret = add_file_extent_hole(dst, hole->start, hole->len);
606                 if (ret)
607                         break;
608                 node = rb_next(node);
609         }
610         return ret;
611 }
612
613 static void free_file_extent_holes(struct rb_root *holes)
614 {
615         struct rb_node *node;
616         struct file_extent_hole *hole;
617
618         node = rb_first(holes);
619         while (node) {
620                 hole = rb_entry(node, struct file_extent_hole, node);
621                 rb_erase(node, holes);
622                 free(hole);
623                 node = rb_first(holes);
624         }
625 }
626
627 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
628
629 static void record_root_in_trans(struct btrfs_trans_handle *trans,
630                                  struct btrfs_root *root)
631 {
632         if (root->last_trans != trans->transid) {
633                 root->track_dirty = 1;
634                 root->last_trans = trans->transid;
635                 root->commit_root = root->node;
636                 extent_buffer_get(root->node);
637         }
638 }
639
640 static u8 imode_to_type(u32 imode)
641 {
642 #define S_SHIFT 12
643         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
644                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
645                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
646                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
647                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
648                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
649                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
650                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
651         };
652
653         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
654 #undef S_SHIFT
655 }
656
657 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
658 {
659         struct device_record *rec1;
660         struct device_record *rec2;
661
662         rec1 = rb_entry(node1, struct device_record, node);
663         rec2 = rb_entry(node2, struct device_record, node);
664         if (rec1->devid > rec2->devid)
665                 return -1;
666         else if (rec1->devid < rec2->devid)
667                 return 1;
668         else
669                 return 0;
670 }
671
672 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
673 {
674         struct inode_record *rec;
675         struct inode_backref *backref;
676         struct inode_backref *orig;
677         struct inode_backref *tmp;
678         struct orphan_data_extent *src_orphan;
679         struct orphan_data_extent *dst_orphan;
680         struct rb_node *rb;
681         size_t size;
682         int ret;
683
684         rec = malloc(sizeof(*rec));
685         if (!rec)
686                 return ERR_PTR(-ENOMEM);
687         memcpy(rec, orig_rec, sizeof(*rec));
688         rec->refs = 1;
689         INIT_LIST_HEAD(&rec->backrefs);
690         INIT_LIST_HEAD(&rec->orphan_extents);
691         rec->holes = RB_ROOT;
692
693         list_for_each_entry(orig, &orig_rec->backrefs, list) {
694                 size = sizeof(*orig) + orig->namelen + 1;
695                 backref = malloc(size);
696                 if (!backref) {
697                         ret = -ENOMEM;
698                         goto cleanup;
699                 }
700                 memcpy(backref, orig, size);
701                 list_add_tail(&backref->list, &rec->backrefs);
702         }
703         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
704                 dst_orphan = malloc(sizeof(*dst_orphan));
705                 if (!dst_orphan) {
706                         ret = -ENOMEM;
707                         goto cleanup;
708                 }
709                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
710                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
711         }
712         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
713         if (ret < 0)
714                 goto cleanup_rb;
715
716         return rec;
717
718 cleanup_rb:
719         rb = rb_first(&rec->holes);
720         while (rb) {
721                 struct file_extent_hole *hole;
722
723                 hole = rb_entry(rb, struct file_extent_hole, node);
724                 rb = rb_next(rb);
725                 free(hole);
726         }
727
728 cleanup:
729         if (!list_empty(&rec->backrefs))
730                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
731                         list_del(&orig->list);
732                         free(orig);
733                 }
734
735         if (!list_empty(&rec->orphan_extents))
736                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
737                         list_del(&orig->list);
738                         free(orig);
739                 }
740
741         free(rec);
742
743         return ERR_PTR(ret);
744 }
745
746 static void print_orphan_data_extents(struct list_head *orphan_extents,
747                                       u64 objectid)
748 {
749         struct orphan_data_extent *orphan;
750
751         if (list_empty(orphan_extents))
752                 return;
753         printf("The following data extent is lost in tree %llu:\n",
754                objectid);
755         list_for_each_entry(orphan, orphan_extents, list) {
756                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
757                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
758                        orphan->disk_len);
759         }
760 }
761
762 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
763 {
764         u64 root_objectid = root->root_key.objectid;
765         int errors = rec->errors;
766
767         if (!errors)
768                 return;
769         /* reloc root errors, we print its corresponding fs root objectid*/
770         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
771                 root_objectid = root->root_key.offset;
772                 fprintf(stderr, "reloc");
773         }
774         fprintf(stderr, "root %llu inode %llu errors %x",
775                 (unsigned long long) root_objectid,
776                 (unsigned long long) rec->ino, rec->errors);
777
778         if (errors & I_ERR_NO_INODE_ITEM)
779                 fprintf(stderr, ", no inode item");
780         if (errors & I_ERR_NO_ORPHAN_ITEM)
781                 fprintf(stderr, ", no orphan item");
782         if (errors & I_ERR_DUP_INODE_ITEM)
783                 fprintf(stderr, ", dup inode item");
784         if (errors & I_ERR_DUP_DIR_INDEX)
785                 fprintf(stderr, ", dup dir index");
786         if (errors & I_ERR_ODD_DIR_ITEM)
787                 fprintf(stderr, ", odd dir item");
788         if (errors & I_ERR_ODD_FILE_EXTENT)
789                 fprintf(stderr, ", odd file extent");
790         if (errors & I_ERR_BAD_FILE_EXTENT)
791                 fprintf(stderr, ", bad file extent");
792         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
793                 fprintf(stderr, ", file extent overlap");
794         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
795                 fprintf(stderr, ", file extent discount");
796         if (errors & I_ERR_DIR_ISIZE_WRONG)
797                 fprintf(stderr, ", dir isize wrong");
798         if (errors & I_ERR_FILE_NBYTES_WRONG)
799                 fprintf(stderr, ", nbytes wrong");
800         if (errors & I_ERR_ODD_CSUM_ITEM)
801                 fprintf(stderr, ", odd csum item");
802         if (errors & I_ERR_SOME_CSUM_MISSING)
803                 fprintf(stderr, ", some csum missing");
804         if (errors & I_ERR_LINK_COUNT_WRONG)
805                 fprintf(stderr, ", link count wrong");
806         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
807                 fprintf(stderr, ", orphan file extent");
808         fprintf(stderr, "\n");
809         /* Print the orphan extents if needed */
810         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
811                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
812
813         /* Print the holes if needed */
814         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
815                 struct file_extent_hole *hole;
816                 struct rb_node *node;
817                 int found = 0;
818
819                 node = rb_first(&rec->holes);
820                 fprintf(stderr, "Found file extent holes:\n");
821                 while (node) {
822                         found = 1;
823                         hole = rb_entry(node, struct file_extent_hole, node);
824                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
825                                 hole->start, hole->len);
826                         node = rb_next(node);
827                 }
828                 if (!found)
829                         fprintf(stderr, "\tstart: 0, len: %llu\n",
830                                 round_up(rec->isize,
831                                          root->fs_info->sectorsize));
832         }
833 }
834
835 static void print_ref_error(int errors)
836 {
837         if (errors & REF_ERR_NO_DIR_ITEM)
838                 fprintf(stderr, ", no dir item");
839         if (errors & REF_ERR_NO_DIR_INDEX)
840                 fprintf(stderr, ", no dir index");
841         if (errors & REF_ERR_NO_INODE_REF)
842                 fprintf(stderr, ", no inode ref");
843         if (errors & REF_ERR_DUP_DIR_ITEM)
844                 fprintf(stderr, ", dup dir item");
845         if (errors & REF_ERR_DUP_DIR_INDEX)
846                 fprintf(stderr, ", dup dir index");
847         if (errors & REF_ERR_DUP_INODE_REF)
848                 fprintf(stderr, ", dup inode ref");
849         if (errors & REF_ERR_INDEX_UNMATCH)
850                 fprintf(stderr, ", index mismatch");
851         if (errors & REF_ERR_FILETYPE_UNMATCH)
852                 fprintf(stderr, ", filetype mismatch");
853         if (errors & REF_ERR_NAME_TOO_LONG)
854                 fprintf(stderr, ", name too long");
855         if (errors & REF_ERR_NO_ROOT_REF)
856                 fprintf(stderr, ", no root ref");
857         if (errors & REF_ERR_NO_ROOT_BACKREF)
858                 fprintf(stderr, ", no root backref");
859         if (errors & REF_ERR_DUP_ROOT_REF)
860                 fprintf(stderr, ", dup root ref");
861         if (errors & REF_ERR_DUP_ROOT_BACKREF)
862                 fprintf(stderr, ", dup root backref");
863         fprintf(stderr, "\n");
864 }
865
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
867                                           u64 ino, int mod)
868 {
869         struct ptr_node *node;
870         struct cache_extent *cache;
871         struct inode_record *rec = NULL;
872         int ret;
873
874         cache = lookup_cache_extent(inode_cache, ino, 1);
875         if (cache) {
876                 node = container_of(cache, struct ptr_node, cache);
877                 rec = node->data;
878                 if (mod && rec->refs > 1) {
879                         node->data = clone_inode_rec(rec);
880                         if (IS_ERR(node->data))
881                                 return node->data;
882                         rec->refs--;
883                         rec = node->data;
884                 }
885         } else if (mod) {
886                 rec = calloc(1, sizeof(*rec));
887                 if (!rec)
888                         return ERR_PTR(-ENOMEM);
889                 rec->ino = ino;
890                 rec->extent_start = (u64)-1;
891                 rec->refs = 1;
892                 INIT_LIST_HEAD(&rec->backrefs);
893                 INIT_LIST_HEAD(&rec->orphan_extents);
894                 rec->holes = RB_ROOT;
895
896                 node = malloc(sizeof(*node));
897                 if (!node) {
898                         free(rec);
899                         return ERR_PTR(-ENOMEM);
900                 }
901                 node->cache.start = ino;
902                 node->cache.size = 1;
903                 node->data = rec;
904
905                 if (ino == BTRFS_FREE_INO_OBJECTID)
906                         rec->found_link = 1;
907
908                 ret = insert_cache_extent(inode_cache, &node->cache);
909                 if (ret)
910                         return ERR_PTR(-EEXIST);
911         }
912         return rec;
913 }
914
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
916 {
917         struct orphan_data_extent *orphan;
918
919         while (!list_empty(orphan_extents)) {
920                 orphan = list_entry(orphan_extents->next,
921                                     struct orphan_data_extent, list);
922                 list_del(&orphan->list);
923                 free(orphan);
924         }
925 }
926
927 static void free_inode_rec(struct inode_record *rec)
928 {
929         struct inode_backref *backref;
930
931         if (--rec->refs > 0)
932                 return;
933
934         while (!list_empty(&rec->backrefs)) {
935                 backref = to_inode_backref(rec->backrefs.next);
936                 list_del(&backref->list);
937                 free(backref);
938         }
939         free_orphan_data_extents(&rec->orphan_extents);
940         free_file_extent_holes(&rec->holes);
941         free(rec);
942 }
943
944 static int can_free_inode_rec(struct inode_record *rec)
945 {
946         if (!rec->errors && rec->checked && rec->found_inode_item &&
947             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
948                 return 1;
949         return 0;
950 }
951
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953                                  struct inode_record *rec)
954 {
955         struct cache_extent *cache;
956         struct inode_backref *tmp, *backref;
957         struct ptr_node *node;
958         u8 filetype;
959
960         if (!rec->found_inode_item)
961                 return;
962
963         filetype = imode_to_type(rec->imode);
964         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965                 if (backref->found_dir_item && backref->found_dir_index) {
966                         if (backref->filetype != filetype)
967                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968                         if (!backref->errors && backref->found_inode_ref &&
969                             rec->nlink == rec->found_link) {
970                                 list_del(&backref->list);
971                                 free(backref);
972                         }
973                 }
974         }
975
976         if (!rec->checked || rec->merging)
977                 return;
978
979         if (S_ISDIR(rec->imode)) {
980                 if (rec->found_size != rec->isize)
981                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982                 if (rec->found_file_extent)
983                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
984         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985                 if (rec->found_dir_item)
986                         rec->errors |= I_ERR_ODD_DIR_ITEM;
987                 if (rec->found_size != rec->nbytes)
988                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989                 if (rec->nlink > 0 && !no_holes &&
990                     (rec->extent_end < rec->isize ||
991                      first_extent_gap(&rec->holes) < rec->isize))
992                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
993         }
994
995         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996                 if (rec->found_csum_item && rec->nodatasum)
997                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
998                 if (rec->some_csum_missing && !rec->nodatasum)
999                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1000         }
1001
1002         BUG_ON(rec->refs != 1);
1003         if (can_free_inode_rec(rec)) {
1004                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005                 node = container_of(cache, struct ptr_node, cache);
1006                 BUG_ON(node->data != rec);
1007                 remove_cache_extent(inode_cache, &node->cache);
1008                 free(node);
1009                 free_inode_rec(rec);
1010         }
1011 }
1012
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1014 {
1015         struct btrfs_path path;
1016         struct btrfs_key key;
1017         int ret;
1018
1019         key.objectid = BTRFS_ORPHAN_OBJECTID;
1020         key.type = BTRFS_ORPHAN_ITEM_KEY;
1021         key.offset = ino;
1022
1023         btrfs_init_path(&path);
1024         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025         btrfs_release_path(&path);
1026         if (ret > 0)
1027                 ret = -ENOENT;
1028         return ret;
1029 }
1030
1031 static int process_inode_item(struct extent_buffer *eb,
1032                               int slot, struct btrfs_key *key,
1033                               struct shared_node *active_node)
1034 {
1035         struct inode_record *rec;
1036         struct btrfs_inode_item *item;
1037
1038         rec = active_node->current;
1039         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040         if (rec->found_inode_item) {
1041                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1042                 return 1;
1043         }
1044         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045         rec->nlink = btrfs_inode_nlink(eb, item);
1046         rec->isize = btrfs_inode_size(eb, item);
1047         rec->nbytes = btrfs_inode_nbytes(eb, item);
1048         rec->imode = btrfs_inode_mode(eb, item);
1049         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1050                 rec->nodatasum = 1;
1051         rec->found_inode_item = 1;
1052         if (rec->nlink == 0)
1053                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054         maybe_free_inode_rec(&active_node->inode_cache, rec);
1055         return 0;
1056 }
1057
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1059                                                 const char *name,
1060                                                 int namelen, u64 dir)
1061 {
1062         struct inode_backref *backref;
1063
1064         list_for_each_entry(backref, &rec->backrefs, list) {
1065                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1066                         break;
1067                 if (backref->dir != dir || backref->namelen != namelen)
1068                         continue;
1069                 if (memcmp(name, backref->name, namelen))
1070                         continue;
1071                 return backref;
1072         }
1073
1074         backref = malloc(sizeof(*backref) + namelen + 1);
1075         if (!backref)
1076                 return NULL;
1077         memset(backref, 0, sizeof(*backref));
1078         backref->dir = dir;
1079         backref->namelen = namelen;
1080         memcpy(backref->name, name, namelen);
1081         backref->name[namelen] = '\0';
1082         list_add_tail(&backref->list, &rec->backrefs);
1083         return backref;
1084 }
1085
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087                              u64 ino, u64 dir, u64 index,
1088                              const char *name, int namelen,
1089                              u8 filetype, u8 itemtype, int errors)
1090 {
1091         struct inode_record *rec;
1092         struct inode_backref *backref;
1093
1094         rec = get_inode_rec(inode_cache, ino, 1);
1095         BUG_ON(IS_ERR(rec));
1096         backref = get_inode_backref(rec, name, namelen, dir);
1097         BUG_ON(!backref);
1098         if (errors)
1099                 backref->errors |= errors;
1100         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101                 if (backref->found_dir_index)
1102                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103                 if (backref->found_inode_ref && backref->index != index)
1104                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1105                 if (backref->found_dir_item && backref->filetype != filetype)
1106                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1107
1108                 backref->index = index;
1109                 backref->filetype = filetype;
1110                 backref->found_dir_index = 1;
1111         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1112                 rec->found_link++;
1113                 if (backref->found_dir_item)
1114                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115                 if (backref->found_dir_index && backref->filetype != filetype)
1116                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1117
1118                 backref->filetype = filetype;
1119                 backref->found_dir_item = 1;
1120         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122                 if (backref->found_inode_ref)
1123                         backref->errors |= REF_ERR_DUP_INODE_REF;
1124                 if (backref->found_dir_index && backref->index != index)
1125                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1126                 else
1127                         backref->index = index;
1128
1129                 backref->ref_type = itemtype;
1130                 backref->found_inode_ref = 1;
1131         } else {
1132                 BUG_ON(1);
1133         }
1134
1135         maybe_free_inode_rec(inode_cache, rec);
1136         return 0;
1137 }
1138
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140                             struct cache_tree *dst_cache)
1141 {
1142         struct inode_backref *backref;
1143         u32 dir_count = 0;
1144         int ret = 0;
1145
1146         dst->merging = 1;
1147         list_for_each_entry(backref, &src->backrefs, list) {
1148                 if (backref->found_dir_index) {
1149                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1150                                         backref->index, backref->name,
1151                                         backref->namelen, backref->filetype,
1152                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1153                 }
1154                 if (backref->found_dir_item) {
1155                         dir_count++;
1156                         add_inode_backref(dst_cache, dst->ino,
1157                                         backref->dir, 0, backref->name,
1158                                         backref->namelen, backref->filetype,
1159                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1160                 }
1161                 if (backref->found_inode_ref) {
1162                         add_inode_backref(dst_cache, dst->ino,
1163                                         backref->dir, backref->index,
1164                                         backref->name, backref->namelen, 0,
1165                                         backref->ref_type, backref->errors);
1166                 }
1167         }
1168
1169         if (src->found_dir_item)
1170                 dst->found_dir_item = 1;
1171         if (src->found_file_extent)
1172                 dst->found_file_extent = 1;
1173         if (src->found_csum_item)
1174                 dst->found_csum_item = 1;
1175         if (src->some_csum_missing)
1176                 dst->some_csum_missing = 1;
1177         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1179                 if (ret < 0)
1180                         return ret;
1181         }
1182
1183         BUG_ON(src->found_link < dir_count);
1184         dst->found_link += src->found_link - dir_count;
1185         dst->found_size += src->found_size;
1186         if (src->extent_start != (u64)-1) {
1187                 if (dst->extent_start == (u64)-1) {
1188                         dst->extent_start = src->extent_start;
1189                         dst->extent_end = src->extent_end;
1190                 } else {
1191                         if (dst->extent_end > src->extent_start)
1192                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193                         else if (dst->extent_end < src->extent_start) {
1194                                 ret = add_file_extent_hole(&dst->holes,
1195                                         dst->extent_end,
1196                                         src->extent_start - dst->extent_end);
1197                         }
1198                         if (dst->extent_end < src->extent_end)
1199                                 dst->extent_end = src->extent_end;
1200                 }
1201         }
1202
1203         dst->errors |= src->errors;
1204         if (src->found_inode_item) {
1205                 if (!dst->found_inode_item) {
1206                         dst->nlink = src->nlink;
1207                         dst->isize = src->isize;
1208                         dst->nbytes = src->nbytes;
1209                         dst->imode = src->imode;
1210                         dst->nodatasum = src->nodatasum;
1211                         dst->found_inode_item = 1;
1212                 } else {
1213                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1214                 }
1215         }
1216         dst->merging = 0;
1217
1218         return 0;
1219 }
1220
1221 static int splice_shared_node(struct shared_node *src_node,
1222                               struct shared_node *dst_node)
1223 {
1224         struct cache_extent *cache;
1225         struct ptr_node *node, *ins;
1226         struct cache_tree *src, *dst;
1227         struct inode_record *rec, *conflict;
1228         u64 current_ino = 0;
1229         int splice = 0;
1230         int ret;
1231
1232         if (--src_node->refs == 0)
1233                 splice = 1;
1234         if (src_node->current)
1235                 current_ino = src_node->current->ino;
1236
1237         src = &src_node->root_cache;
1238         dst = &dst_node->root_cache;
1239 again:
1240         cache = search_cache_extent(src, 0);
1241         while (cache) {
1242                 node = container_of(cache, struct ptr_node, cache);
1243                 rec = node->data;
1244                 cache = next_cache_extent(cache);
1245
1246                 if (splice) {
1247                         remove_cache_extent(src, &node->cache);
1248                         ins = node;
1249                 } else {
1250                         ins = malloc(sizeof(*ins));
1251                         BUG_ON(!ins);
1252                         ins->cache.start = node->cache.start;
1253                         ins->cache.size = node->cache.size;
1254                         ins->data = rec;
1255                         rec->refs++;
1256                 }
1257                 ret = insert_cache_extent(dst, &ins->cache);
1258                 if (ret == -EEXIST) {
1259                         conflict = get_inode_rec(dst, rec->ino, 1);
1260                         BUG_ON(IS_ERR(conflict));
1261                         merge_inode_recs(rec, conflict, dst);
1262                         if (rec->checked) {
1263                                 conflict->checked = 1;
1264                                 if (dst_node->current == conflict)
1265                                         dst_node->current = NULL;
1266                         }
1267                         maybe_free_inode_rec(dst, conflict);
1268                         free_inode_rec(rec);
1269                         free(ins);
1270                 } else {
1271                         BUG_ON(ret);
1272                 }
1273         }
1274
1275         if (src == &src_node->root_cache) {
1276                 src = &src_node->inode_cache;
1277                 dst = &dst_node->inode_cache;
1278                 goto again;
1279         }
1280
1281         if (current_ino > 0 && (!dst_node->current ||
1282             current_ino > dst_node->current->ino)) {
1283                 if (dst_node->current) {
1284                         dst_node->current->checked = 1;
1285                         maybe_free_inode_rec(dst, dst_node->current);
1286                 }
1287                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288                 BUG_ON(IS_ERR(dst_node->current));
1289         }
1290         return 0;
1291 }
1292
1293 static void free_inode_ptr(struct cache_extent *cache)
1294 {
1295         struct ptr_node *node;
1296         struct inode_record *rec;
1297
1298         node = container_of(cache, struct ptr_node, cache);
1299         rec = node->data;
1300         free_inode_rec(rec);
1301         free(node);
1302 }
1303
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1305
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1307                                             u64 bytenr)
1308 {
1309         struct cache_extent *cache;
1310         struct shared_node *node;
1311
1312         cache = lookup_cache_extent(shared, bytenr, 1);
1313         if (cache) {
1314                 node = container_of(cache, struct shared_node, cache);
1315                 return node;
1316         }
1317         return NULL;
1318 }
1319
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1321 {
1322         int ret;
1323         struct shared_node *node;
1324
1325         node = calloc(1, sizeof(*node));
1326         if (!node)
1327                 return -ENOMEM;
1328         node->cache.start = bytenr;
1329         node->cache.size = 1;
1330         cache_tree_init(&node->root_cache);
1331         cache_tree_init(&node->inode_cache);
1332         node->refs = refs;
1333
1334         ret = insert_cache_extent(shared, &node->cache);
1335
1336         return ret;
1337 }
1338
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340                              struct walk_control *wc, int level)
1341 {
1342         struct shared_node *node;
1343         struct shared_node *dest;
1344         int ret;
1345
1346         if (level == wc->active_node)
1347                 return 0;
1348
1349         BUG_ON(wc->active_node <= level);
1350         node = find_shared_node(&wc->shared, bytenr);
1351         if (!node) {
1352                 ret = add_shared_node(&wc->shared, bytenr, refs);
1353                 BUG_ON(ret);
1354                 node = find_shared_node(&wc->shared, bytenr);
1355                 wc->nodes[level] = node;
1356                 wc->active_node = level;
1357                 return 0;
1358         }
1359
1360         if (wc->root_level == wc->active_node &&
1361             btrfs_root_refs(&root->root_item) == 0) {
1362                 if (--node->refs == 0) {
1363                         free_inode_recs_tree(&node->root_cache);
1364                         free_inode_recs_tree(&node->inode_cache);
1365                         remove_cache_extent(&wc->shared, &node->cache);
1366                         free(node);
1367                 }
1368                 return 1;
1369         }
1370
1371         dest = wc->nodes[wc->active_node];
1372         splice_shared_node(node, dest);
1373         if (node->refs == 0) {
1374                 remove_cache_extent(&wc->shared, &node->cache);
1375                 free(node);
1376         }
1377         return 1;
1378 }
1379
1380 static int leave_shared_node(struct btrfs_root *root,
1381                              struct walk_control *wc, int level)
1382 {
1383         struct shared_node *node;
1384         struct shared_node *dest;
1385         int i;
1386
1387         if (level == wc->root_level)
1388                 return 0;
1389
1390         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1391                 if (wc->nodes[i])
1392                         break;
1393         }
1394         BUG_ON(i >= BTRFS_MAX_LEVEL);
1395
1396         node = wc->nodes[wc->active_node];
1397         wc->nodes[wc->active_node] = NULL;
1398         wc->active_node = i;
1399
1400         dest = wc->nodes[wc->active_node];
1401         if (wc->active_node < wc->root_level ||
1402             btrfs_root_refs(&root->root_item) > 0) {
1403                 BUG_ON(node->refs <= 1);
1404                 splice_shared_node(node, dest);
1405         } else {
1406                 BUG_ON(node->refs < 2);
1407                 node->refs--;
1408         }
1409         return 0;
1410 }
1411
1412 /*
1413  * Returns:
1414  * < 0 - on error
1415  * 1   - if the root with id child_root_id is a child of root parent_root_id
1416  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1417  *       has other root(s) as parent(s)
1418  * 2   - if the root child_root_id doesn't have any parent roots
1419  */
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1421                          u64 child_root_id)
1422 {
1423         struct btrfs_path path;
1424         struct btrfs_key key;
1425         struct extent_buffer *leaf;
1426         int has_parent = 0;
1427         int ret;
1428
1429         btrfs_init_path(&path);
1430
1431         key.objectid = parent_root_id;
1432         key.type = BTRFS_ROOT_REF_KEY;
1433         key.offset = child_root_id;
1434         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1435                                 0, 0);
1436         if (ret < 0)
1437                 return ret;
1438         btrfs_release_path(&path);
1439         if (!ret)
1440                 return 1;
1441
1442         key.objectid = child_root_id;
1443         key.type = BTRFS_ROOT_BACKREF_KEY;
1444         key.offset = 0;
1445         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1446                                 0, 0);
1447         if (ret < 0)
1448                 goto out;
1449
1450         while (1) {
1451                 leaf = path.nodes[0];
1452                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1454                         if (ret)
1455                                 break;
1456                         leaf = path.nodes[0];
1457                 }
1458
1459                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460                 if (key.objectid != child_root_id ||
1461                     key.type != BTRFS_ROOT_BACKREF_KEY)
1462                         break;
1463
1464                 has_parent = 1;
1465
1466                 if (key.offset == parent_root_id) {
1467                         btrfs_release_path(&path);
1468                         return 1;
1469                 }
1470
1471                 path.slots[0]++;
1472         }
1473 out:
1474         btrfs_release_path(&path);
1475         if (ret < 0)
1476                 return ret;
1477         return has_parent ? 0 : 2;
1478 }
1479
1480 static int process_dir_item(struct extent_buffer *eb,
1481                             int slot, struct btrfs_key *key,
1482                             struct shared_node *active_node)
1483 {
1484         u32 total;
1485         u32 cur = 0;
1486         u32 len;
1487         u32 name_len;
1488         u32 data_len;
1489         int error;
1490         int nritems = 0;
1491         u8 filetype;
1492         struct btrfs_dir_item *di;
1493         struct inode_record *rec;
1494         struct cache_tree *root_cache;
1495         struct cache_tree *inode_cache;
1496         struct btrfs_key location;
1497         char namebuf[BTRFS_NAME_LEN];
1498
1499         root_cache = &active_node->root_cache;
1500         inode_cache = &active_node->inode_cache;
1501         rec = active_node->current;
1502         rec->found_dir_item = 1;
1503
1504         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1505         total = btrfs_item_size_nr(eb, slot);
1506         while (cur < total) {
1507                 nritems++;
1508                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1509                 name_len = btrfs_dir_name_len(eb, di);
1510                 data_len = btrfs_dir_data_len(eb, di);
1511                 filetype = btrfs_dir_type(eb, di);
1512
1513                 rec->found_size += name_len;
1514                 if (cur + sizeof(*di) + name_len > total ||
1515                     name_len > BTRFS_NAME_LEN) {
1516                         error = REF_ERR_NAME_TOO_LONG;
1517
1518                         if (cur + sizeof(*di) > total)
1519                                 break;
1520                         len = min_t(u32, total - cur - sizeof(*di),
1521                                     BTRFS_NAME_LEN);
1522                 } else {
1523                         len = name_len;
1524                         error = 0;
1525                 }
1526
1527                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1528
1529                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1530                     key->offset != btrfs_name_hash(namebuf, len)) {
1531                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1532                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1533                         key->objectid, key->offset, namebuf, len, filetype,
1534                         key->offset, btrfs_name_hash(namebuf, len));
1535                 }
1536
1537                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1538                         add_inode_backref(inode_cache, location.objectid,
1539                                           key->objectid, key->offset, namebuf,
1540                                           len, filetype, key->type, error);
1541                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1542                         add_inode_backref(root_cache, location.objectid,
1543                                           key->objectid, key->offset,
1544                                           namebuf, len, filetype,
1545                                           key->type, error);
1546                 } else {
1547                         fprintf(stderr, "invalid location in dir item %u\n",
1548                                 location.type);
1549                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1550                                           key->objectid, key->offset, namebuf,
1551                                           len, filetype, key->type, error);
1552                 }
1553
1554                 len = sizeof(*di) + name_len + data_len;
1555                 di = (struct btrfs_dir_item *)((char *)di + len);
1556                 cur += len;
1557         }
1558         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1559                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1560
1561         return 0;
1562 }
1563
1564 static int process_inode_ref(struct extent_buffer *eb,
1565                              int slot, struct btrfs_key *key,
1566                              struct shared_node *active_node)
1567 {
1568         u32 total;
1569         u32 cur = 0;
1570         u32 len;
1571         u32 name_len;
1572         u64 index;
1573         int error;
1574         struct cache_tree *inode_cache;
1575         struct btrfs_inode_ref *ref;
1576         char namebuf[BTRFS_NAME_LEN];
1577
1578         inode_cache = &active_node->inode_cache;
1579
1580         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1581         total = btrfs_item_size_nr(eb, slot);
1582         while (cur < total) {
1583                 name_len = btrfs_inode_ref_name_len(eb, ref);
1584                 index = btrfs_inode_ref_index(eb, ref);
1585
1586                 /* inode_ref + namelen should not cross item boundary */
1587                 if (cur + sizeof(*ref) + name_len > total ||
1588                     name_len > BTRFS_NAME_LEN) {
1589                         if (total < cur + sizeof(*ref))
1590                                 break;
1591
1592                         /* Still try to read out the remaining part */
1593                         len = min_t(u32, total - cur - sizeof(*ref),
1594                                     BTRFS_NAME_LEN);
1595                         error = REF_ERR_NAME_TOO_LONG;
1596                 } else {
1597                         len = name_len;
1598                         error = 0;
1599                 }
1600
1601                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1602                 add_inode_backref(inode_cache, key->objectid, key->offset,
1603                                   index, namebuf, len, 0, key->type, error);
1604
1605                 len = sizeof(*ref) + name_len;
1606                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1607                 cur += len;
1608         }
1609         return 0;
1610 }
1611
1612 static int process_inode_extref(struct extent_buffer *eb,
1613                                 int slot, struct btrfs_key *key,
1614                                 struct shared_node *active_node)
1615 {
1616         u32 total;
1617         u32 cur = 0;
1618         u32 len;
1619         u32 name_len;
1620         u64 index;
1621         u64 parent;
1622         int error;
1623         struct cache_tree *inode_cache;
1624         struct btrfs_inode_extref *extref;
1625         char namebuf[BTRFS_NAME_LEN];
1626
1627         inode_cache = &active_node->inode_cache;
1628
1629         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1630         total = btrfs_item_size_nr(eb, slot);
1631         while (cur < total) {
1632                 name_len = btrfs_inode_extref_name_len(eb, extref);
1633                 index = btrfs_inode_extref_index(eb, extref);
1634                 parent = btrfs_inode_extref_parent(eb, extref);
1635                 if (name_len <= BTRFS_NAME_LEN) {
1636                         len = name_len;
1637                         error = 0;
1638                 } else {
1639                         len = BTRFS_NAME_LEN;
1640                         error = REF_ERR_NAME_TOO_LONG;
1641                 }
1642                 read_extent_buffer(eb, namebuf,
1643                                    (unsigned long)(extref + 1), len);
1644                 add_inode_backref(inode_cache, key->objectid, parent,
1645                                   index, namebuf, len, 0, key->type, error);
1646
1647                 len = sizeof(*extref) + name_len;
1648                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1649                 cur += len;
1650         }
1651         return 0;
1652
1653 }
1654
1655 static int count_csum_range(struct btrfs_root *root, u64 start,
1656                             u64 len, u64 *found)
1657 {
1658         struct btrfs_key key;
1659         struct btrfs_path path;
1660         struct extent_buffer *leaf;
1661         int ret;
1662         size_t size;
1663         *found = 0;
1664         u64 csum_end;
1665         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1666
1667         btrfs_init_path(&path);
1668
1669         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1670         key.offset = start;
1671         key.type = BTRFS_EXTENT_CSUM_KEY;
1672
1673         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1674                                 &key, &path, 0, 0);
1675         if (ret < 0)
1676                 goto out;
1677         if (ret > 0 && path.slots[0] > 0) {
1678                 leaf = path.nodes[0];
1679                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1680                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1681                     key.type == BTRFS_EXTENT_CSUM_KEY)
1682                         path.slots[0]--;
1683         }
1684
1685         while (len > 0) {
1686                 leaf = path.nodes[0];
1687                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1688                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1689                         if (ret > 0)
1690                                 break;
1691                         else if (ret < 0)
1692                                 goto out;
1693                         leaf = path.nodes[0];
1694                 }
1695
1696                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1697                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1698                     key.type != BTRFS_EXTENT_CSUM_KEY)
1699                         break;
1700
1701                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1702                 if (key.offset >= start + len)
1703                         break;
1704
1705                 if (key.offset > start)
1706                         start = key.offset;
1707
1708                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1709                 csum_end = key.offset + (size / csum_size) *
1710                            root->fs_info->sectorsize;
1711                 if (csum_end > start) {
1712                         size = min(csum_end - start, len);
1713                         len -= size;
1714                         start += size;
1715                         *found += size;
1716                 }
1717
1718                 path.slots[0]++;
1719         }
1720 out:
1721         btrfs_release_path(&path);
1722         if (ret < 0)
1723                 return ret;
1724         return 0;
1725 }
1726
1727 static int process_file_extent(struct btrfs_root *root,
1728                                 struct extent_buffer *eb,
1729                                 int slot, struct btrfs_key *key,
1730                                 struct shared_node *active_node)
1731 {
1732         struct inode_record *rec;
1733         struct btrfs_file_extent_item *fi;
1734         u64 num_bytes = 0;
1735         u64 disk_bytenr = 0;
1736         u64 extent_offset = 0;
1737         u64 mask = root->fs_info->sectorsize - 1;
1738         int extent_type;
1739         int ret;
1740
1741         rec = active_node->current;
1742         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1743         rec->found_file_extent = 1;
1744
1745         if (rec->extent_start == (u64)-1) {
1746                 rec->extent_start = key->offset;
1747                 rec->extent_end = key->offset;
1748         }
1749
1750         if (rec->extent_end > key->offset)
1751                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1752         else if (rec->extent_end < key->offset) {
1753                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1754                                            key->offset - rec->extent_end);
1755                 if (ret < 0)
1756                         return ret;
1757         }
1758
1759         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1760         extent_type = btrfs_file_extent_type(eb, fi);
1761
1762         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1763                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1764                 if (num_bytes == 0)
1765                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1766                 rec->found_size += num_bytes;
1767                 num_bytes = (num_bytes + mask) & ~mask;
1768         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1769                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1770                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1771                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1772                 extent_offset = btrfs_file_extent_offset(eb, fi);
1773                 if (num_bytes == 0 || (num_bytes & mask))
1774                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775                 if (num_bytes + extent_offset >
1776                     btrfs_file_extent_ram_bytes(eb, fi))
1777                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1779                     (btrfs_file_extent_compression(eb, fi) ||
1780                      btrfs_file_extent_encryption(eb, fi) ||
1781                      btrfs_file_extent_other_encoding(eb, fi)))
1782                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783                 if (disk_bytenr > 0)
1784                         rec->found_size += num_bytes;
1785         } else {
1786                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1787         }
1788         rec->extent_end = key->offset + num_bytes;
1789
1790         /*
1791          * The data reloc tree will copy full extents into its inode and then
1792          * copy the corresponding csums.  Because the extent it copied could be
1793          * a preallocated extent that hasn't been written to yet there may be no
1794          * csums to copy, ergo we won't have csums for our file extent.  This is
1795          * ok so just don't bother checking csums if the inode belongs to the
1796          * data reloc tree.
1797          */
1798         if (disk_bytenr > 0 &&
1799             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1800                 u64 found;
1801                 if (btrfs_file_extent_compression(eb, fi))
1802                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1803                 else
1804                         disk_bytenr += extent_offset;
1805
1806                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1807                 if (ret < 0)
1808                         return ret;
1809                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1810                         if (found > 0)
1811                                 rec->found_csum_item = 1;
1812                         if (found < num_bytes)
1813                                 rec->some_csum_missing = 1;
1814                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1815                         if (found > 0)
1816                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1817                 }
1818         }
1819         return 0;
1820 }
1821
1822 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1823                             struct walk_control *wc)
1824 {
1825         struct btrfs_key key;
1826         u32 nritems;
1827         int i;
1828         int ret = 0;
1829         struct cache_tree *inode_cache;
1830         struct shared_node *active_node;
1831
1832         if (wc->root_level == wc->active_node &&
1833             btrfs_root_refs(&root->root_item) == 0)
1834                 return 0;
1835
1836         active_node = wc->nodes[wc->active_node];
1837         inode_cache = &active_node->inode_cache;
1838         nritems = btrfs_header_nritems(eb);
1839         for (i = 0; i < nritems; i++) {
1840                 btrfs_item_key_to_cpu(eb, &key, i);
1841
1842                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1843                         continue;
1844                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1845                         continue;
1846
1847                 if (active_node->current == NULL ||
1848                     active_node->current->ino < key.objectid) {
1849                         if (active_node->current) {
1850                                 active_node->current->checked = 1;
1851                                 maybe_free_inode_rec(inode_cache,
1852                                                      active_node->current);
1853                         }
1854                         active_node->current = get_inode_rec(inode_cache,
1855                                                              key.objectid, 1);
1856                         BUG_ON(IS_ERR(active_node->current));
1857                 }
1858                 switch (key.type) {
1859                 case BTRFS_DIR_ITEM_KEY:
1860                 case BTRFS_DIR_INDEX_KEY:
1861                         ret = process_dir_item(eb, i, &key, active_node);
1862                         break;
1863                 case BTRFS_INODE_REF_KEY:
1864                         ret = process_inode_ref(eb, i, &key, active_node);
1865                         break;
1866                 case BTRFS_INODE_EXTREF_KEY:
1867                         ret = process_inode_extref(eb, i, &key, active_node);
1868                         break;
1869                 case BTRFS_INODE_ITEM_KEY:
1870                         ret = process_inode_item(eb, i, &key, active_node);
1871                         break;
1872                 case BTRFS_EXTENT_DATA_KEY:
1873                         ret = process_file_extent(root, eb, i, &key,
1874                                                   active_node);
1875                         break;
1876                 default:
1877                         break;
1878                 };
1879         }
1880         return ret;
1881 }
1882
1883 struct node_refs {
1884         u64 bytenr[BTRFS_MAX_LEVEL];
1885         u64 refs[BTRFS_MAX_LEVEL];
1886         int need_check[BTRFS_MAX_LEVEL];
1887 };
1888
1889 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1890                              struct node_refs *nrefs, u64 level);
1891 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1892                             unsigned int ext_ref);
1893
1894 /*
1895  * Returns >0  Found error, not fatal, should continue
1896  * Returns <0  Fatal error, must exit the whole check
1897  * Returns 0   No errors found
1898  */
1899 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1900                                struct node_refs *nrefs, int *level, int ext_ref)
1901 {
1902         struct extent_buffer *cur = path->nodes[0];
1903         struct btrfs_key key;
1904         u64 cur_bytenr;
1905         u32 nritems;
1906         u64 first_ino = 0;
1907         int root_level = btrfs_header_level(root->node);
1908         int i;
1909         int ret = 0; /* Final return value */
1910         int err = 0; /* Positive error bitmap */
1911
1912         cur_bytenr = cur->start;
1913
1914         /* skip to first inode item or the first inode number change */
1915         nritems = btrfs_header_nritems(cur);
1916         for (i = 0; i < nritems; i++) {
1917                 btrfs_item_key_to_cpu(cur, &key, i);
1918                 if (i == 0)
1919                         first_ino = key.objectid;
1920                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1921                     (first_ino && first_ino != key.objectid))
1922                         break;
1923         }
1924         if (i == nritems) {
1925                 path->slots[0] = nritems;
1926                 return 0;
1927         }
1928         path->slots[0] = i;
1929
1930 again:
1931         err |= check_inode_item(root, path, ext_ref);
1932
1933         if (err & LAST_ITEM)
1934                 goto out;
1935
1936         /* still have inode items in thie leaf */
1937         if (cur->start == cur_bytenr)
1938                 goto again;
1939
1940         /*
1941          * we have switched to another leaf, above nodes may
1942          * have changed, here walk down the path, if a node
1943          * or leaf is shared, check whether we can skip this
1944          * node or leaf.
1945          */
1946         for (i = root_level; i >= 0; i--) {
1947                 if (path->nodes[i]->start == nrefs->bytenr[i])
1948                         continue;
1949
1950                 ret = update_nodes_refs(root,
1951                                 path->nodes[i]->start,
1952                                 nrefs, i);
1953                 if (ret)
1954                         goto out;
1955
1956                 if (!nrefs->need_check[i]) {
1957                         *level += 1;
1958                         break;
1959                 }
1960         }
1961
1962         for (i = 0; i < *level; i++) {
1963                 free_extent_buffer(path->nodes[i]);
1964                 path->nodes[i] = NULL;
1965         }
1966 out:
1967         err &= ~LAST_ITEM;
1968         if (err && !ret)
1969                 ret = err;
1970         return ret;
1971 }
1972
1973 static void reada_walk_down(struct btrfs_root *root,
1974                             struct extent_buffer *node, int slot)
1975 {
1976         struct btrfs_fs_info *fs_info = root->fs_info;
1977         u64 bytenr;
1978         u64 ptr_gen;
1979         u32 nritems;
1980         int i;
1981         int level;
1982
1983         level = btrfs_header_level(node);
1984         if (level != 1)
1985                 return;
1986
1987         nritems = btrfs_header_nritems(node);
1988         for (i = slot; i < nritems; i++) {
1989                 bytenr = btrfs_node_blockptr(node, i);
1990                 ptr_gen = btrfs_node_ptr_generation(node, i);
1991                 readahead_tree_block(fs_info, bytenr, ptr_gen);
1992         }
1993 }
1994
1995 /*
1996  * Check the child node/leaf by the following condition:
1997  * 1. the first item key of the node/leaf should be the same with the one
1998  *    in parent.
1999  * 2. block in parent node should match the child node/leaf.
2000  * 3. generation of parent node and child's header should be consistent.
2001  *
2002  * Or the child node/leaf pointed by the key in parent is not valid.
2003  *
2004  * We hope to check leaf owner too, but since subvol may share leaves,
2005  * which makes leaf owner check not so strong, key check should be
2006  * sufficient enough for that case.
2007  */
2008 static int check_child_node(struct extent_buffer *parent, int slot,
2009                             struct extent_buffer *child)
2010 {
2011         struct btrfs_key parent_key;
2012         struct btrfs_key child_key;
2013         int ret = 0;
2014
2015         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2016         if (btrfs_header_level(child) == 0)
2017                 btrfs_item_key_to_cpu(child, &child_key, 0);
2018         else
2019                 btrfs_node_key_to_cpu(child, &child_key, 0);
2020
2021         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2022                 ret = -EINVAL;
2023                 fprintf(stderr,
2024                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2025                         parent_key.objectid, parent_key.type, parent_key.offset,
2026                         child_key.objectid, child_key.type, child_key.offset);
2027         }
2028         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2029                 ret = -EINVAL;
2030                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2031                         btrfs_node_blockptr(parent, slot),
2032                         btrfs_header_bytenr(child));
2033         }
2034         if (btrfs_node_ptr_generation(parent, slot) !=
2035             btrfs_header_generation(child)) {
2036                 ret = -EINVAL;
2037                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2038                         btrfs_header_generation(child),
2039                         btrfs_node_ptr_generation(parent, slot));
2040         }
2041         return ret;
2042 }
2043
2044 /*
2045  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2046  * in every fs or file tree check. Here we find its all root ids, and only check
2047  * it in the fs or file tree which has the smallest root id.
2048  */
2049 static int need_check(struct btrfs_root *root, struct ulist *roots)
2050 {
2051         struct rb_node *node;
2052         struct ulist_node *u;
2053
2054         if (roots->nnodes == 1)
2055                 return 1;
2056
2057         node = rb_first(&roots->root);
2058         u = rb_entry(node, struct ulist_node, rb_node);
2059         /*
2060          * current root id is not smallest, we skip it and let it be checked
2061          * in the fs or file tree who hash the smallest root id.
2062          */
2063         if (root->objectid != u->val)
2064                 return 0;
2065
2066         return 1;
2067 }
2068
2069 /*
2070  * for a tree node or leaf, we record its reference count, so later if we still
2071  * process this node or leaf, don't need to compute its reference count again.
2072  */
2073 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2074                              struct node_refs *nrefs, u64 level)
2075 {
2076         int check, ret;
2077         u64 refs;
2078         struct ulist *roots;
2079
2080         if (nrefs->bytenr[level] != bytenr) {
2081                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2082                                        level, 1, &refs, NULL);
2083                 if (ret < 0)
2084                         return ret;
2085
2086                 nrefs->bytenr[level] = bytenr;
2087                 nrefs->refs[level] = refs;
2088                 if (refs > 1) {
2089                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2090                                                    0, &roots);
2091                         if (ret)
2092                                 return -EIO;
2093
2094                         check = need_check(root, roots);
2095                         ulist_free(roots);
2096                         nrefs->need_check[level] = check;
2097                 } else {
2098                         nrefs->need_check[level] = 1;
2099                 }
2100         }
2101
2102         return 0;
2103 }
2104
2105 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2106                           struct walk_control *wc, int *level,
2107                           struct node_refs *nrefs)
2108 {
2109         enum btrfs_tree_block_status status;
2110         u64 bytenr;
2111         u64 ptr_gen;
2112         struct btrfs_fs_info *fs_info = root->fs_info;
2113         struct extent_buffer *next;
2114         struct extent_buffer *cur;
2115         int ret, err = 0;
2116         u64 refs;
2117
2118         WARN_ON(*level < 0);
2119         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2120
2121         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2122                 refs = nrefs->refs[*level];
2123                 ret = 0;
2124         } else {
2125                 ret = btrfs_lookup_extent_info(NULL, root,
2126                                        path->nodes[*level]->start,
2127                                        *level, 1, &refs, NULL);
2128                 if (ret < 0) {
2129                         err = ret;
2130                         goto out;
2131                 }
2132                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2133                 nrefs->refs[*level] = refs;
2134         }
2135
2136         if (refs > 1) {
2137                 ret = enter_shared_node(root, path->nodes[*level]->start,
2138                                         refs, wc, *level);
2139                 if (ret > 0) {
2140                         err = ret;
2141                         goto out;
2142                 }
2143         }
2144
2145         while (*level >= 0) {
2146                 WARN_ON(*level < 0);
2147                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2148                 cur = path->nodes[*level];
2149
2150                 if (btrfs_header_level(cur) != *level)
2151                         WARN_ON(1);
2152
2153                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2154                         break;
2155                 if (*level == 0) {
2156                         ret = process_one_leaf(root, cur, wc);
2157                         if (ret < 0)
2158                                 err = ret;
2159                         break;
2160                 }
2161                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2162                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2163
2164                 if (bytenr == nrefs->bytenr[*level - 1]) {
2165                         refs = nrefs->refs[*level - 1];
2166                 } else {
2167                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2168                                         *level - 1, 1, &refs, NULL);
2169                         if (ret < 0) {
2170                                 refs = 0;
2171                         } else {
2172                                 nrefs->bytenr[*level - 1] = bytenr;
2173                                 nrefs->refs[*level - 1] = refs;
2174                         }
2175                 }
2176
2177                 if (refs > 1) {
2178                         ret = enter_shared_node(root, bytenr, refs,
2179                                                 wc, *level - 1);
2180                         if (ret > 0) {
2181                                 path->slots[*level]++;
2182                                 continue;
2183                         }
2184                 }
2185
2186                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2187                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2188                         free_extent_buffer(next);
2189                         reada_walk_down(root, cur, path->slots[*level]);
2190                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2191                         if (!extent_buffer_uptodate(next)) {
2192                                 struct btrfs_key node_key;
2193
2194                                 btrfs_node_key_to_cpu(path->nodes[*level],
2195                                                       &node_key,
2196                                                       path->slots[*level]);
2197                                 btrfs_add_corrupt_extent_record(root->fs_info,
2198                                                 &node_key,
2199                                                 path->nodes[*level]->start,
2200                                                 root->fs_info->nodesize,
2201                                                 *level);
2202                                 err = -EIO;
2203                                 goto out;
2204                         }
2205                 }
2206
2207                 ret = check_child_node(cur, path->slots[*level], next);
2208                 if (ret) {
2209                         free_extent_buffer(next);
2210                         err = ret;
2211                         goto out;
2212                 }
2213
2214                 if (btrfs_is_leaf(next))
2215                         status = btrfs_check_leaf(root, NULL, next);
2216                 else
2217                         status = btrfs_check_node(root, NULL, next);
2218                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2219                         free_extent_buffer(next);
2220                         err = -EIO;
2221                         goto out;
2222                 }
2223
2224                 *level = *level - 1;
2225                 free_extent_buffer(path->nodes[*level]);
2226                 path->nodes[*level] = next;
2227                 path->slots[*level] = 0;
2228         }
2229 out:
2230         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2231         return err;
2232 }
2233
2234 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2235                             unsigned int ext_ref);
2236
2237 /*
2238  * Returns >0  Found error, should continue
2239  * Returns <0  Fatal error, must exit the whole check
2240  * Returns 0   No errors found
2241  */
2242 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2243                              int *level, struct node_refs *nrefs, int ext_ref)
2244 {
2245         enum btrfs_tree_block_status status;
2246         u64 bytenr;
2247         u64 ptr_gen;
2248         struct btrfs_fs_info *fs_info = root->fs_info;
2249         struct extent_buffer *next;
2250         struct extent_buffer *cur;
2251         int ret;
2252
2253         WARN_ON(*level < 0);
2254         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2255
2256         ret = update_nodes_refs(root, path->nodes[*level]->start,
2257                                 nrefs, *level);
2258         if (ret < 0)
2259                 return ret;
2260
2261         while (*level >= 0) {
2262                 WARN_ON(*level < 0);
2263                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2264                 cur = path->nodes[*level];
2265
2266                 if (btrfs_header_level(cur) != *level)
2267                         WARN_ON(1);
2268
2269                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2270                         break;
2271                 /* Don't forgot to check leaf/node validation */
2272                 if (*level == 0) {
2273                         ret = btrfs_check_leaf(root, NULL, cur);
2274                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2275                                 ret = -EIO;
2276                                 break;
2277                         }
2278                         ret = process_one_leaf_v2(root, path, nrefs,
2279                                                   level, ext_ref);
2280                         break;
2281                 } else {
2282                         ret = btrfs_check_node(root, NULL, cur);
2283                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2284                                 ret = -EIO;
2285                                 break;
2286                         }
2287                 }
2288                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2289                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2290
2291                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2292                 if (ret)
2293                         break;
2294                 if (!nrefs->need_check[*level - 1]) {
2295                         path->slots[*level]++;
2296                         continue;
2297                 }
2298
2299                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2300                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2301                         free_extent_buffer(next);
2302                         reada_walk_down(root, cur, path->slots[*level]);
2303                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2304                         if (!extent_buffer_uptodate(next)) {
2305                                 struct btrfs_key node_key;
2306
2307                                 btrfs_node_key_to_cpu(path->nodes[*level],
2308                                                       &node_key,
2309                                                       path->slots[*level]);
2310                                 btrfs_add_corrupt_extent_record(fs_info,
2311                                                 &node_key,
2312                                                 path->nodes[*level]->start,
2313                                                 fs_info->nodesize,
2314                                                 *level);
2315                                 ret = -EIO;
2316                                 break;
2317                         }
2318                 }
2319
2320                 ret = check_child_node(cur, path->slots[*level], next);
2321                 if (ret < 0) 
2322                         break;
2323
2324                 if (btrfs_is_leaf(next))
2325                         status = btrfs_check_leaf(root, NULL, next);
2326                 else
2327                         status = btrfs_check_node(root, NULL, next);
2328                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2329                         free_extent_buffer(next);
2330                         ret = -EIO;
2331                         break;
2332                 }
2333
2334                 *level = *level - 1;
2335                 free_extent_buffer(path->nodes[*level]);
2336                 path->nodes[*level] = next;
2337                 path->slots[*level] = 0;
2338         }
2339         return ret;
2340 }
2341
2342 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2343                         struct walk_control *wc, int *level)
2344 {
2345         int i;
2346         struct extent_buffer *leaf;
2347
2348         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2349                 leaf = path->nodes[i];
2350                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2351                         path->slots[i]++;
2352                         *level = i;
2353                         return 0;
2354                 } else {
2355                         free_extent_buffer(path->nodes[*level]);
2356                         path->nodes[*level] = NULL;
2357                         BUG_ON(*level > wc->active_node);
2358                         if (*level == wc->active_node)
2359                                 leave_shared_node(root, wc, *level);
2360                         *level = i + 1;
2361                 }
2362         }
2363         return 1;
2364 }
2365
2366 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2367                            int *level)
2368 {
2369         int i;
2370         struct extent_buffer *leaf;
2371
2372         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2373                 leaf = path->nodes[i];
2374                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2375                         path->slots[i]++;
2376                         *level = i;
2377                         return 0;
2378                 } else {
2379                         free_extent_buffer(path->nodes[*level]);
2380                         path->nodes[*level] = NULL;
2381                         *level = i + 1;
2382                 }
2383         }
2384         return 1;
2385 }
2386
2387 static int check_root_dir(struct inode_record *rec)
2388 {
2389         struct inode_backref *backref;
2390         int ret = -1;
2391
2392         if (!rec->found_inode_item || rec->errors)
2393                 goto out;
2394         if (rec->nlink != 1 || rec->found_link != 0)
2395                 goto out;
2396         if (list_empty(&rec->backrefs))
2397                 goto out;
2398         backref = to_inode_backref(rec->backrefs.next);
2399         if (!backref->found_inode_ref)
2400                 goto out;
2401         if (backref->index != 0 || backref->namelen != 2 ||
2402             memcmp(backref->name, "..", 2))
2403                 goto out;
2404         if (backref->found_dir_index || backref->found_dir_item)
2405                 goto out;
2406         ret = 0;
2407 out:
2408         return ret;
2409 }
2410
2411 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2412                               struct btrfs_root *root, struct btrfs_path *path,
2413                               struct inode_record *rec)
2414 {
2415         struct btrfs_inode_item *ei;
2416         struct btrfs_key key;
2417         int ret;
2418
2419         key.objectid = rec->ino;
2420         key.type = BTRFS_INODE_ITEM_KEY;
2421         key.offset = (u64)-1;
2422
2423         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2424         if (ret < 0)
2425                 goto out;
2426         if (ret) {
2427                 if (!path->slots[0]) {
2428                         ret = -ENOENT;
2429                         goto out;
2430                 }
2431                 path->slots[0]--;
2432                 ret = 0;
2433         }
2434         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2435         if (key.objectid != rec->ino) {
2436                 ret = -ENOENT;
2437                 goto out;
2438         }
2439
2440         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2441                             struct btrfs_inode_item);
2442         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2443         btrfs_mark_buffer_dirty(path->nodes[0]);
2444         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2445         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2446                root->root_key.objectid);
2447 out:
2448         btrfs_release_path(path);
2449         return ret;
2450 }
2451
2452 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2453                                     struct btrfs_root *root,
2454                                     struct btrfs_path *path,
2455                                     struct inode_record *rec)
2456 {
2457         int ret;
2458
2459         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2460         btrfs_release_path(path);
2461         if (!ret)
2462                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2463         return ret;
2464 }
2465
2466 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2467                                struct btrfs_root *root,
2468                                struct btrfs_path *path,
2469                                struct inode_record *rec)
2470 {
2471         struct btrfs_inode_item *ei;
2472         struct btrfs_key key;
2473         int ret = 0;
2474
2475         key.objectid = rec->ino;
2476         key.type = BTRFS_INODE_ITEM_KEY;
2477         key.offset = 0;
2478
2479         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2480         if (ret) {
2481                 if (ret > 0)
2482                         ret = -ENOENT;
2483                 goto out;
2484         }
2485
2486         /* Since ret == 0, no need to check anything */
2487         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2488                             struct btrfs_inode_item);
2489         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2490         btrfs_mark_buffer_dirty(path->nodes[0]);
2491         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2492         printf("reset nbytes for ino %llu root %llu\n",
2493                rec->ino, root->root_key.objectid);
2494 out:
2495         btrfs_release_path(path);
2496         return ret;
2497 }
2498
2499 static int add_missing_dir_index(struct btrfs_root *root,
2500                                  struct cache_tree *inode_cache,
2501                                  struct inode_record *rec,
2502                                  struct inode_backref *backref)
2503 {
2504         struct btrfs_path path;
2505         struct btrfs_trans_handle *trans;
2506         struct btrfs_dir_item *dir_item;
2507         struct extent_buffer *leaf;
2508         struct btrfs_key key;
2509         struct btrfs_disk_key disk_key;
2510         struct inode_record *dir_rec;
2511         unsigned long name_ptr;
2512         u32 data_size = sizeof(*dir_item) + backref->namelen;
2513         int ret;
2514
2515         trans = btrfs_start_transaction(root, 1);
2516         if (IS_ERR(trans))
2517                 return PTR_ERR(trans);
2518
2519         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2520                 (unsigned long long)rec->ino);
2521
2522         btrfs_init_path(&path);
2523         key.objectid = backref->dir;
2524         key.type = BTRFS_DIR_INDEX_KEY;
2525         key.offset = backref->index;
2526         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2527         BUG_ON(ret);
2528
2529         leaf = path.nodes[0];
2530         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2531
2532         disk_key.objectid = cpu_to_le64(rec->ino);
2533         disk_key.type = BTRFS_INODE_ITEM_KEY;
2534         disk_key.offset = 0;
2535
2536         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2537         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2538         btrfs_set_dir_data_len(leaf, dir_item, 0);
2539         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2540         name_ptr = (unsigned long)(dir_item + 1);
2541         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2542         btrfs_mark_buffer_dirty(leaf);
2543         btrfs_release_path(&path);
2544         btrfs_commit_transaction(trans, root);
2545
2546         backref->found_dir_index = 1;
2547         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2548         BUG_ON(IS_ERR(dir_rec));
2549         if (!dir_rec)
2550                 return 0;
2551         dir_rec->found_size += backref->namelen;
2552         if (dir_rec->found_size == dir_rec->isize &&
2553             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2554                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2555         if (dir_rec->found_size != dir_rec->isize)
2556                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2557
2558         return 0;
2559 }
2560
2561 static int delete_dir_index(struct btrfs_root *root,
2562                             struct inode_backref *backref)
2563 {
2564         struct btrfs_trans_handle *trans;
2565         struct btrfs_dir_item *di;
2566         struct btrfs_path path;
2567         int ret = 0;
2568
2569         trans = btrfs_start_transaction(root, 1);
2570         if (IS_ERR(trans))
2571                 return PTR_ERR(trans);
2572
2573         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2574                 (unsigned long long)backref->dir,
2575                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2576                 (unsigned long long)root->objectid);
2577
2578         btrfs_init_path(&path);
2579         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2580                                     backref->name, backref->namelen,
2581                                     backref->index, -1);
2582         if (IS_ERR(di)) {
2583                 ret = PTR_ERR(di);
2584                 btrfs_release_path(&path);
2585                 btrfs_commit_transaction(trans, root);
2586                 if (ret == -ENOENT)
2587                         return 0;
2588                 return ret;
2589         }
2590
2591         if (!di)
2592                 ret = btrfs_del_item(trans, root, &path);
2593         else
2594                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2595         BUG_ON(ret);
2596         btrfs_release_path(&path);
2597         btrfs_commit_transaction(trans, root);
2598         return ret;
2599 }
2600
2601 static int create_inode_item(struct btrfs_root *root,
2602                              struct inode_record *rec,
2603                              int root_dir)
2604 {
2605         struct btrfs_trans_handle *trans;
2606         struct btrfs_inode_item inode_item;
2607         time_t now = time(NULL);
2608         int ret;
2609
2610         trans = btrfs_start_transaction(root, 1);
2611         if (IS_ERR(trans)) {
2612                 ret = PTR_ERR(trans);
2613                 return ret;
2614         }
2615
2616         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2617                 "be incomplete, please check permissions and content after "
2618                 "the fsck completes.\n", (unsigned long long)root->objectid,
2619                 (unsigned long long)rec->ino);
2620
2621         memset(&inode_item, 0, sizeof(inode_item));
2622         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2623         if (root_dir)
2624                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2625         else
2626                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2627         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2628         if (rec->found_dir_item) {
2629                 if (rec->found_file_extent)
2630                         fprintf(stderr, "root %llu inode %llu has both a dir "
2631                                 "item and extents, unsure if it is a dir or a "
2632                                 "regular file so setting it as a directory\n",
2633                                 (unsigned long long)root->objectid,
2634                                 (unsigned long long)rec->ino);
2635                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2636                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2637         } else if (!rec->found_dir_item) {
2638                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2639                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2640         }
2641         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2642         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2643         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2644         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2645         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2646         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2647         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2648         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2649
2650         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2651         BUG_ON(ret);
2652         btrfs_commit_transaction(trans, root);
2653         return 0;
2654 }
2655
2656 static int repair_inode_backrefs(struct btrfs_root *root,
2657                                  struct inode_record *rec,
2658                                  struct cache_tree *inode_cache,
2659                                  int delete)
2660 {
2661         struct inode_backref *tmp, *backref;
2662         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2663         int ret = 0;
2664         int repaired = 0;
2665
2666         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2667                 if (!delete && rec->ino == root_dirid) {
2668                         if (!rec->found_inode_item) {
2669                                 ret = create_inode_item(root, rec, 1);
2670                                 if (ret)
2671                                         break;
2672                                 repaired++;
2673                         }
2674                 }
2675
2676                 /* Index 0 for root dir's are special, don't mess with it */
2677                 if (rec->ino == root_dirid && backref->index == 0)
2678                         continue;
2679
2680                 if (delete &&
2681                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2682                      (backref->found_dir_index && backref->found_inode_ref &&
2683                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2684                         ret = delete_dir_index(root, backref);
2685                         if (ret)
2686                                 break;
2687                         repaired++;
2688                         list_del(&backref->list);
2689                         free(backref);
2690                         continue;
2691                 }
2692
2693                 if (!delete && !backref->found_dir_index &&
2694                     backref->found_dir_item && backref->found_inode_ref) {
2695                         ret = add_missing_dir_index(root, inode_cache, rec,
2696                                                     backref);
2697                         if (ret)
2698                                 break;
2699                         repaired++;
2700                         if (backref->found_dir_item &&
2701                             backref->found_dir_index) {
2702                                 if (!backref->errors &&
2703                                     backref->found_inode_ref) {
2704                                         list_del(&backref->list);
2705                                         free(backref);
2706                                         continue;
2707                                 }
2708                         }
2709                 }
2710
2711                 if (!delete && (!backref->found_dir_index &&
2712                                 !backref->found_dir_item &&
2713                                 backref->found_inode_ref)) {
2714                         struct btrfs_trans_handle *trans;
2715                         struct btrfs_key location;
2716
2717                         ret = check_dir_conflict(root, backref->name,
2718                                                  backref->namelen,
2719                                                  backref->dir,
2720                                                  backref->index);
2721                         if (ret) {
2722                                 /*
2723                                  * let nlink fixing routine to handle it,
2724                                  * which can do it better.
2725                                  */
2726                                 ret = 0;
2727                                 break;
2728                         }
2729                         location.objectid = rec->ino;
2730                         location.type = BTRFS_INODE_ITEM_KEY;
2731                         location.offset = 0;
2732
2733                         trans = btrfs_start_transaction(root, 1);
2734                         if (IS_ERR(trans)) {
2735                                 ret = PTR_ERR(trans);
2736                                 break;
2737                         }
2738                         fprintf(stderr, "adding missing dir index/item pair "
2739                                 "for inode %llu\n",
2740                                 (unsigned long long)rec->ino);
2741                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2742                                                     backref->namelen,
2743                                                     backref->dir, &location,
2744                                                     imode_to_type(rec->imode),
2745                                                     backref->index);
2746                         BUG_ON(ret);
2747                         btrfs_commit_transaction(trans, root);
2748                         repaired++;
2749                 }
2750
2751                 if (!delete && (backref->found_inode_ref &&
2752                                 backref->found_dir_index &&
2753                                 backref->found_dir_item &&
2754                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2755                                 !rec->found_inode_item)) {
2756                         ret = create_inode_item(root, rec, 0);
2757                         if (ret)
2758                                 break;
2759                         repaired++;
2760                 }
2761
2762         }
2763         return ret ? ret : repaired;
2764 }
2765
2766 /*
2767  * To determine the file type for nlink/inode_item repair
2768  *
2769  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2770  * Return -ENOENT if file type is not found.
2771  */
2772 static int find_file_type(struct inode_record *rec, u8 *type)
2773 {
2774         struct inode_backref *backref;
2775
2776         /* For inode item recovered case */
2777         if (rec->found_inode_item) {
2778                 *type = imode_to_type(rec->imode);
2779                 return 0;
2780         }
2781
2782         list_for_each_entry(backref, &rec->backrefs, list) {
2783                 if (backref->found_dir_index || backref->found_dir_item) {
2784                         *type = backref->filetype;
2785                         return 0;
2786                 }
2787         }
2788         return -ENOENT;
2789 }
2790
2791 /*
2792  * To determine the file name for nlink repair
2793  *
2794  * Return 0 if file name is found, set name and namelen.
2795  * Return -ENOENT if file name is not found.
2796  */
2797 static int find_file_name(struct inode_record *rec,
2798                           char *name, int *namelen)
2799 {
2800         struct inode_backref *backref;
2801
2802         list_for_each_entry(backref, &rec->backrefs, list) {
2803                 if (backref->found_dir_index || backref->found_dir_item ||
2804                     backref->found_inode_ref) {
2805                         memcpy(name, backref->name, backref->namelen);
2806                         *namelen = backref->namelen;
2807                         return 0;
2808                 }
2809         }
2810         return -ENOENT;
2811 }
2812
2813 /* Reset the nlink of the inode to the correct one */
2814 static int reset_nlink(struct btrfs_trans_handle *trans,
2815                        struct btrfs_root *root,
2816                        struct btrfs_path *path,
2817                        struct inode_record *rec)
2818 {
2819         struct inode_backref *backref;
2820         struct inode_backref *tmp;
2821         struct btrfs_key key;
2822         struct btrfs_inode_item *inode_item;
2823         int ret = 0;
2824
2825         /* We don't believe this either, reset it and iterate backref */
2826         rec->found_link = 0;
2827
2828         /* Remove all backref including the valid ones */
2829         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2830                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2831                                    backref->index, backref->name,
2832                                    backref->namelen, 0);
2833                 if (ret < 0)
2834                         goto out;
2835
2836                 /* remove invalid backref, so it won't be added back */
2837                 if (!(backref->found_dir_index &&
2838                       backref->found_dir_item &&
2839                       backref->found_inode_ref)) {
2840                         list_del(&backref->list);
2841                         free(backref);
2842                 } else {
2843                         rec->found_link++;
2844                 }
2845         }
2846
2847         /* Set nlink to 0 */
2848         key.objectid = rec->ino;
2849         key.type = BTRFS_INODE_ITEM_KEY;
2850         key.offset = 0;
2851         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2852         if (ret < 0)
2853                 goto out;
2854         if (ret > 0) {
2855                 ret = -ENOENT;
2856                 goto out;
2857         }
2858         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2859                                     struct btrfs_inode_item);
2860         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2861         btrfs_mark_buffer_dirty(path->nodes[0]);
2862         btrfs_release_path(path);
2863
2864         /*
2865          * Add back valid inode_ref/dir_item/dir_index,
2866          * add_link() will handle the nlink inc, so new nlink must be correct
2867          */
2868         list_for_each_entry(backref, &rec->backrefs, list) {
2869                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2870                                      backref->name, backref->namelen,
2871                                      backref->filetype, &backref->index, 1);
2872                 if (ret < 0)
2873                         goto out;
2874         }
2875 out:
2876         btrfs_release_path(path);
2877         return ret;
2878 }
2879
2880 static int get_highest_inode(struct btrfs_trans_handle *trans,
2881                                 struct btrfs_root *root,
2882                                 struct btrfs_path *path,
2883                                 u64 *highest_ino)
2884 {
2885         struct btrfs_key key, found_key;
2886         int ret;
2887
2888         btrfs_init_path(path);
2889         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2890         key.offset = -1;
2891         key.type = BTRFS_INODE_ITEM_KEY;
2892         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2893         if (ret == 1) {
2894                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2895                                 path->slots[0] - 1);
2896                 *highest_ino = found_key.objectid;
2897                 ret = 0;
2898         }
2899         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2900                 ret = -EOVERFLOW;
2901         btrfs_release_path(path);
2902         return ret;
2903 }
2904
2905 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2906                                struct btrfs_root *root,
2907                                struct btrfs_path *path,
2908                                struct inode_record *rec)
2909 {
2910         char *dir_name = "lost+found";
2911         char namebuf[BTRFS_NAME_LEN] = {0};
2912         u64 lost_found_ino;
2913         u32 mode = 0700;
2914         u8 type = 0;
2915         int namelen = 0;
2916         int name_recovered = 0;
2917         int type_recovered = 0;
2918         int ret = 0;
2919
2920         /*
2921          * Get file name and type first before these invalid inode ref
2922          * are deleted by remove_all_invalid_backref()
2923          */
2924         name_recovered = !find_file_name(rec, namebuf, &namelen);
2925         type_recovered = !find_file_type(rec, &type);
2926
2927         if (!name_recovered) {
2928                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2929                        rec->ino, rec->ino);
2930                 namelen = count_digits(rec->ino);
2931                 sprintf(namebuf, "%llu", rec->ino);
2932                 name_recovered = 1;
2933         }
2934         if (!type_recovered) {
2935                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2936                        rec->ino);
2937                 type = BTRFS_FT_REG_FILE;
2938                 type_recovered = 1;
2939         }
2940
2941         ret = reset_nlink(trans, root, path, rec);
2942         if (ret < 0) {
2943                 fprintf(stderr,
2944                         "Failed to reset nlink for inode %llu: %s\n",
2945                         rec->ino, strerror(-ret));
2946                 goto out;
2947         }
2948
2949         if (rec->found_link == 0) {
2950                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2951                 if (ret < 0)
2952                         goto out;
2953                 lost_found_ino++;
2954                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2955                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2956                                   mode);
2957                 if (ret < 0) {
2958                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2959                                 dir_name, strerror(-ret));
2960                         goto out;
2961                 }
2962                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2963                                      namebuf, namelen, type, NULL, 1);
2964                 /*
2965                  * Add ".INO" suffix several times to handle case where
2966                  * "FILENAME.INO" is already taken by another file.
2967                  */
2968                 while (ret == -EEXIST) {
2969                         /*
2970                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2971                          */
2972                         if (namelen + count_digits(rec->ino) + 1 >
2973                             BTRFS_NAME_LEN) {
2974                                 ret = -EFBIG;
2975                                 goto out;
2976                         }
2977                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2978                                  ".%llu", rec->ino);
2979                         namelen += count_digits(rec->ino) + 1;
2980                         ret = btrfs_add_link(trans, root, rec->ino,
2981                                              lost_found_ino, namebuf,
2982                                              namelen, type, NULL, 1);
2983                 }
2984                 if (ret < 0) {
2985                         fprintf(stderr,
2986                                 "Failed to link the inode %llu to %s dir: %s\n",
2987                                 rec->ino, dir_name, strerror(-ret));
2988                         goto out;
2989                 }
2990                 /*
2991                  * Just increase the found_link, don't actually add the
2992                  * backref. This will make things easier and this inode
2993                  * record will be freed after the repair is done.
2994                  * So fsck will not report problem about this inode.
2995                  */
2996                 rec->found_link++;
2997                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2998                        namelen, namebuf, dir_name);
2999         }
3000         printf("Fixed the nlink of inode %llu\n", rec->ino);
3001 out:
3002         /*
3003          * Clear the flag anyway, or we will loop forever for the same inode
3004          * as it will not be removed from the bad inode list and the dead loop
3005          * happens.
3006          */
3007         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3008         btrfs_release_path(path);
3009         return ret;
3010 }
3011
3012 /*
3013  * Check if there is any normal(reg or prealloc) file extent for given
3014  * ino.
3015  * This is used to determine the file type when neither its dir_index/item or
3016  * inode_item exists.
3017  *
3018  * This will *NOT* report error, if any error happens, just consider it does
3019  * not have any normal file extent.
3020  */
3021 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3022 {
3023         struct btrfs_path path;
3024         struct btrfs_key key;
3025         struct btrfs_key found_key;
3026         struct btrfs_file_extent_item *fi;
3027         u8 type;
3028         int ret = 0;
3029
3030         btrfs_init_path(&path);
3031         key.objectid = ino;
3032         key.type = BTRFS_EXTENT_DATA_KEY;
3033         key.offset = 0;
3034
3035         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3036         if (ret < 0) {
3037                 ret = 0;
3038                 goto out;
3039         }
3040         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3041                 ret = btrfs_next_leaf(root, &path);
3042                 if (ret) {
3043                         ret = 0;
3044                         goto out;
3045                 }
3046         }
3047         while (1) {
3048                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3049                                       path.slots[0]);
3050                 if (found_key.objectid != ino ||
3051                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3052                         break;
3053                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3054                                     struct btrfs_file_extent_item);
3055                 type = btrfs_file_extent_type(path.nodes[0], fi);
3056                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3057                         ret = 1;
3058                         goto out;
3059                 }
3060         }
3061 out:
3062         btrfs_release_path(&path);
3063         return ret;
3064 }
3065
3066 static u32 btrfs_type_to_imode(u8 type)
3067 {
3068         static u32 imode_by_btrfs_type[] = {
3069                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3070                 [BTRFS_FT_DIR]          = S_IFDIR,
3071                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3072                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3073                 [BTRFS_FT_FIFO]         = S_IFIFO,
3074                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3075                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3076         };
3077
3078         return imode_by_btrfs_type[(type)];
3079 }
3080
3081 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3082                                 struct btrfs_root *root,
3083                                 struct btrfs_path *path,
3084                                 struct inode_record *rec)
3085 {
3086         u8 filetype;
3087         u32 mode = 0700;
3088         int type_recovered = 0;
3089         int ret = 0;
3090
3091         printf("Trying to rebuild inode:%llu\n", rec->ino);
3092
3093         type_recovered = !find_file_type(rec, &filetype);
3094
3095         /*
3096          * Try to determine inode type if type not found.
3097          *
3098          * For found regular file extent, it must be FILE.
3099          * For found dir_item/index, it must be DIR.
3100          *
3101          * For undetermined one, use FILE as fallback.
3102          *
3103          * TODO:
3104          * 1. If found backref(inode_index/item is already handled) to it,
3105          *    it must be DIR.
3106          *    Need new inode-inode ref structure to allow search for that.
3107          */
3108         if (!type_recovered) {
3109                 if (rec->found_file_extent &&
3110                     find_normal_file_extent(root, rec->ino)) {
3111                         type_recovered = 1;
3112                         filetype = BTRFS_FT_REG_FILE;
3113                 } else if (rec->found_dir_item) {
3114                         type_recovered = 1;
3115                         filetype = BTRFS_FT_DIR;
3116                 } else if (!list_empty(&rec->orphan_extents)) {
3117                         type_recovered = 1;
3118                         filetype = BTRFS_FT_REG_FILE;
3119                 } else{
3120                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3121                                rec->ino);
3122                         type_recovered = 1;
3123                         filetype = BTRFS_FT_REG_FILE;
3124                 }
3125         }
3126
3127         ret = btrfs_new_inode(trans, root, rec->ino,
3128                               mode | btrfs_type_to_imode(filetype));
3129         if (ret < 0)
3130                 goto out;
3131
3132         /*
3133          * Here inode rebuild is done, we only rebuild the inode item,
3134          * don't repair the nlink(like move to lost+found).
3135          * That is the job of nlink repair.
3136          *
3137          * We just fill the record and return
3138          */
3139         rec->found_dir_item = 1;
3140         rec->imode = mode | btrfs_type_to_imode(filetype);
3141         rec->nlink = 0;
3142         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3143         /* Ensure the inode_nlinks repair function will be called */
3144         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3145 out:
3146         return ret;
3147 }
3148
3149 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3150                                       struct btrfs_root *root,
3151                                       struct btrfs_path *path,
3152                                       struct inode_record *rec)
3153 {
3154         struct orphan_data_extent *orphan;
3155         struct orphan_data_extent *tmp;
3156         int ret = 0;
3157
3158         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3159                 /*
3160                  * Check for conflicting file extents
3161                  *
3162                  * Here we don't know whether the extents is compressed or not,
3163                  * so we can only assume it not compressed nor data offset,
3164                  * and use its disk_len as extent length.
3165                  */
3166                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3167                                        orphan->offset, orphan->disk_len, 0);
3168                 btrfs_release_path(path);
3169                 if (ret < 0)
3170                         goto out;
3171                 if (!ret) {
3172                         fprintf(stderr,
3173                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3174                                 orphan->disk_bytenr, orphan->disk_len);
3175                         ret = btrfs_free_extent(trans,
3176                                         root->fs_info->extent_root,
3177                                         orphan->disk_bytenr, orphan->disk_len,
3178                                         0, root->objectid, orphan->objectid,
3179                                         orphan->offset);
3180                         if (ret < 0)
3181                                 goto out;
3182                 }
3183                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3184                                 orphan->offset, orphan->disk_bytenr,
3185                                 orphan->disk_len, orphan->disk_len);
3186                 if (ret < 0)
3187                         goto out;
3188
3189                 /* Update file size info */
3190                 rec->found_size += orphan->disk_len;
3191                 if (rec->found_size == rec->nbytes)
3192                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3193
3194                 /* Update the file extent hole info too */
3195                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3196                                            orphan->disk_len);
3197                 if (ret < 0)
3198                         goto out;
3199                 if (RB_EMPTY_ROOT(&rec->holes))
3200                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3201
3202                 list_del(&orphan->list);
3203                 free(orphan);
3204         }
3205         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3206 out:
3207         return ret;
3208 }
3209
3210 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3211                                         struct btrfs_root *root,
3212                                         struct btrfs_path *path,
3213                                         struct inode_record *rec)
3214 {
3215         struct rb_node *node;
3216         struct file_extent_hole *hole;
3217         int found = 0;
3218         int ret = 0;
3219
3220         node = rb_first(&rec->holes);
3221
3222         while (node) {
3223                 found = 1;
3224                 hole = rb_entry(node, struct file_extent_hole, node);
3225                 ret = btrfs_punch_hole(trans, root, rec->ino,
3226                                        hole->start, hole->len);
3227                 if (ret < 0)
3228                         goto out;
3229                 ret = del_file_extent_hole(&rec->holes, hole->start,
3230                                            hole->len);
3231                 if (ret < 0)
3232                         goto out;
3233                 if (RB_EMPTY_ROOT(&rec->holes))
3234                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3235                 node = rb_first(&rec->holes);
3236         }
3237         /* special case for a file losing all its file extent */
3238         if (!found) {
3239                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3240                                        round_up(rec->isize,
3241                                                 root->fs_info->sectorsize));
3242                 if (ret < 0)
3243                         goto out;
3244         }
3245         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3246                rec->ino, root->objectid);
3247 out:
3248         return ret;
3249 }
3250
3251 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3252 {
3253         struct btrfs_trans_handle *trans;
3254         struct btrfs_path path;
3255         int ret = 0;
3256
3257         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3258                              I_ERR_NO_ORPHAN_ITEM |
3259                              I_ERR_LINK_COUNT_WRONG |
3260                              I_ERR_NO_INODE_ITEM |
3261                              I_ERR_FILE_EXTENT_ORPHAN |
3262                              I_ERR_FILE_EXTENT_DISCOUNT|
3263                              I_ERR_FILE_NBYTES_WRONG)))
3264                 return rec->errors;
3265
3266         /*
3267          * For nlink repair, it may create a dir and add link, so
3268          * 2 for parent(256)'s dir_index and dir_item
3269          * 2 for lost+found dir's inode_item and inode_ref
3270          * 1 for the new inode_ref of the file
3271          * 2 for lost+found dir's dir_index and dir_item for the file
3272          */
3273         trans = btrfs_start_transaction(root, 7);
3274         if (IS_ERR(trans))
3275                 return PTR_ERR(trans);
3276
3277         btrfs_init_path(&path);
3278         if (rec->errors & I_ERR_NO_INODE_ITEM)
3279                 ret = repair_inode_no_item(trans, root, &path, rec);
3280         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3281                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3282         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3283                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3284         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3285                 ret = repair_inode_isize(trans, root, &path, rec);
3286         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3287                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3288         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3289                 ret = repair_inode_nlinks(trans, root, &path, rec);
3290         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3291                 ret = repair_inode_nbytes(trans, root, &path, rec);
3292         btrfs_commit_transaction(trans, root);
3293         btrfs_release_path(&path);
3294         return ret;
3295 }
3296
3297 static int check_inode_recs(struct btrfs_root *root,
3298                             struct cache_tree *inode_cache)
3299 {
3300         struct cache_extent *cache;
3301         struct ptr_node *node;
3302         struct inode_record *rec;
3303         struct inode_backref *backref;
3304         int stage = 0;
3305         int ret = 0;
3306         int err = 0;
3307         u64 error = 0;
3308         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3309
3310         if (btrfs_root_refs(&root->root_item) == 0) {
3311                 if (!cache_tree_empty(inode_cache))
3312                         fprintf(stderr, "warning line %d\n", __LINE__);
3313                 return 0;
3314         }
3315
3316         /*
3317          * We need to repair backrefs first because we could change some of the
3318          * errors in the inode recs.
3319          *
3320          * We also need to go through and delete invalid backrefs first and then
3321          * add the correct ones second.  We do this because we may get EEXIST
3322          * when adding back the correct index because we hadn't yet deleted the
3323          * invalid index.
3324          *
3325          * For example, if we were missing a dir index then the directories
3326          * isize would be wrong, so if we fixed the isize to what we thought it
3327          * would be and then fixed the backref we'd still have a invalid fs, so
3328          * we need to add back the dir index and then check to see if the isize
3329          * is still wrong.
3330          */
3331         while (stage < 3) {
3332                 stage++;
3333                 if (stage == 3 && !err)
3334                         break;
3335
3336                 cache = search_cache_extent(inode_cache, 0);
3337                 while (repair && cache) {
3338                         node = container_of(cache, struct ptr_node, cache);
3339                         rec = node->data;
3340                         cache = next_cache_extent(cache);
3341
3342                         /* Need to free everything up and rescan */
3343                         if (stage == 3) {
3344                                 remove_cache_extent(inode_cache, &node->cache);
3345                                 free(node);
3346                                 free_inode_rec(rec);
3347                                 continue;
3348                         }
3349
3350                         if (list_empty(&rec->backrefs))
3351                                 continue;
3352
3353                         ret = repair_inode_backrefs(root, rec, inode_cache,
3354                                                     stage == 1);
3355                         if (ret < 0) {
3356                                 err = ret;
3357                                 stage = 2;
3358                                 break;
3359                         } if (ret > 0) {
3360                                 err = -EAGAIN;
3361                         }
3362                 }
3363         }
3364         if (err)
3365                 return err;
3366
3367         rec = get_inode_rec(inode_cache, root_dirid, 0);
3368         BUG_ON(IS_ERR(rec));
3369         if (rec) {
3370                 ret = check_root_dir(rec);
3371                 if (ret) {
3372                         fprintf(stderr, "root %llu root dir %llu error\n",
3373                                 (unsigned long long)root->root_key.objectid,
3374                                 (unsigned long long)root_dirid);
3375                         print_inode_error(root, rec);
3376                         error++;
3377                 }
3378         } else {
3379                 if (repair) {
3380                         struct btrfs_trans_handle *trans;
3381
3382                         trans = btrfs_start_transaction(root, 1);
3383                         if (IS_ERR(trans)) {
3384                                 err = PTR_ERR(trans);
3385                                 return err;
3386                         }
3387
3388                         fprintf(stderr,
3389                                 "root %llu missing its root dir, recreating\n",
3390                                 (unsigned long long)root->objectid);
3391
3392                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3393                         BUG_ON(ret);
3394
3395                         btrfs_commit_transaction(trans, root);
3396                         return -EAGAIN;
3397                 }
3398
3399                 fprintf(stderr, "root %llu root dir %llu not found\n",
3400                         (unsigned long long)root->root_key.objectid,
3401                         (unsigned long long)root_dirid);
3402         }
3403
3404         while (1) {
3405                 cache = search_cache_extent(inode_cache, 0);
3406                 if (!cache)
3407                         break;
3408                 node = container_of(cache, struct ptr_node, cache);
3409                 rec = node->data;
3410                 remove_cache_extent(inode_cache, &node->cache);
3411                 free(node);
3412                 if (rec->ino == root_dirid ||
3413                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3414                         free_inode_rec(rec);
3415                         continue;
3416                 }
3417
3418                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3419                         ret = check_orphan_item(root, rec->ino);
3420                         if (ret == 0)
3421                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3422                         if (can_free_inode_rec(rec)) {
3423                                 free_inode_rec(rec);
3424                                 continue;
3425                         }
3426                 }
3427
3428                 if (!rec->found_inode_item)
3429                         rec->errors |= I_ERR_NO_INODE_ITEM;
3430                 if (rec->found_link != rec->nlink)
3431                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3432                 if (repair) {
3433                         ret = try_repair_inode(root, rec);
3434                         if (ret == 0 && can_free_inode_rec(rec)) {
3435                                 free_inode_rec(rec);
3436                                 continue;
3437                         }
3438                         ret = 0;
3439                 }
3440
3441                 if (!(repair && ret == 0))
3442                         error++;
3443                 print_inode_error(root, rec);
3444                 list_for_each_entry(backref, &rec->backrefs, list) {
3445                         if (!backref->found_dir_item)
3446                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3447                         if (!backref->found_dir_index)
3448                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3449                         if (!backref->found_inode_ref)
3450                                 backref->errors |= REF_ERR_NO_INODE_REF;
3451                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3452                                 " namelen %u name %s filetype %d errors %x",
3453                                 (unsigned long long)backref->dir,
3454                                 (unsigned long long)backref->index,
3455                                 backref->namelen, backref->name,
3456                                 backref->filetype, backref->errors);
3457                         print_ref_error(backref->errors);
3458                 }
3459                 free_inode_rec(rec);
3460         }
3461         return (error > 0) ? -1 : 0;
3462 }
3463
3464 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3465                                         u64 objectid)
3466 {
3467         struct cache_extent *cache;
3468         struct root_record *rec = NULL;
3469         int ret;
3470
3471         cache = lookup_cache_extent(root_cache, objectid, 1);
3472         if (cache) {
3473                 rec = container_of(cache, struct root_record, cache);
3474         } else {
3475                 rec = calloc(1, sizeof(*rec));
3476                 if (!rec)
3477                         return ERR_PTR(-ENOMEM);
3478                 rec->objectid = objectid;
3479                 INIT_LIST_HEAD(&rec->backrefs);
3480                 rec->cache.start = objectid;
3481                 rec->cache.size = 1;
3482
3483                 ret = insert_cache_extent(root_cache, &rec->cache);
3484                 if (ret)
3485                         return ERR_PTR(-EEXIST);
3486         }
3487         return rec;
3488 }
3489
3490 static struct root_backref *get_root_backref(struct root_record *rec,
3491                                              u64 ref_root, u64 dir, u64 index,
3492                                              const char *name, int namelen)
3493 {
3494         struct root_backref *backref;
3495
3496         list_for_each_entry(backref, &rec->backrefs, list) {
3497                 if (backref->ref_root != ref_root || backref->dir != dir ||
3498                     backref->namelen != namelen)
3499                         continue;
3500                 if (memcmp(name, backref->name, namelen))
3501                         continue;
3502                 return backref;
3503         }
3504
3505         backref = calloc(1, sizeof(*backref) + namelen + 1);
3506         if (!backref)
3507                 return NULL;
3508         backref->ref_root = ref_root;
3509         backref->dir = dir;
3510         backref->index = index;
3511         backref->namelen = namelen;
3512         memcpy(backref->name, name, namelen);
3513         backref->name[namelen] = '\0';
3514         list_add_tail(&backref->list, &rec->backrefs);
3515         return backref;
3516 }
3517
3518 static void free_root_record(struct cache_extent *cache)
3519 {
3520         struct root_record *rec;
3521         struct root_backref *backref;
3522
3523         rec = container_of(cache, struct root_record, cache);
3524         while (!list_empty(&rec->backrefs)) {
3525                 backref = to_root_backref(rec->backrefs.next);
3526                 list_del(&backref->list);
3527                 free(backref);
3528         }
3529
3530         free(rec);
3531 }
3532
3533 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3534
3535 static int add_root_backref(struct cache_tree *root_cache,
3536                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3537                             const char *name, int namelen,
3538                             int item_type, int errors)
3539 {
3540         struct root_record *rec;
3541         struct root_backref *backref;
3542
3543         rec = get_root_rec(root_cache, root_id);
3544         BUG_ON(IS_ERR(rec));
3545         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3546         BUG_ON(!backref);
3547
3548         backref->errors |= errors;
3549
3550         if (item_type != BTRFS_DIR_ITEM_KEY) {
3551                 if (backref->found_dir_index || backref->found_back_ref ||
3552                     backref->found_forward_ref) {
3553                         if (backref->index != index)
3554                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3555                 } else {
3556                         backref->index = index;
3557                 }
3558         }
3559
3560         if (item_type == BTRFS_DIR_ITEM_KEY) {
3561                 if (backref->found_forward_ref)
3562                         rec->found_ref++;
3563                 backref->found_dir_item = 1;
3564         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3565                 backref->found_dir_index = 1;
3566         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3567                 if (backref->found_forward_ref)
3568                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3569                 else if (backref->found_dir_item)
3570                         rec->found_ref++;
3571                 backref->found_forward_ref = 1;
3572         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3573                 if (backref->found_back_ref)
3574                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3575                 backref->found_back_ref = 1;
3576         } else {
3577                 BUG_ON(1);
3578         }
3579
3580         if (backref->found_forward_ref && backref->found_dir_item)
3581                 backref->reachable = 1;
3582         return 0;
3583 }
3584
3585 static int merge_root_recs(struct btrfs_root *root,
3586                            struct cache_tree *src_cache,
3587                            struct cache_tree *dst_cache)
3588 {
3589         struct cache_extent *cache;
3590         struct ptr_node *node;
3591         struct inode_record *rec;
3592         struct inode_backref *backref;
3593         int ret = 0;
3594
3595         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3596                 free_inode_recs_tree(src_cache);
3597                 return 0;
3598         }
3599
3600         while (1) {
3601                 cache = search_cache_extent(src_cache, 0);
3602                 if (!cache)
3603                         break;
3604                 node = container_of(cache, struct ptr_node, cache);
3605                 rec = node->data;
3606                 remove_cache_extent(src_cache, &node->cache);
3607                 free(node);
3608
3609                 ret = is_child_root(root, root->objectid, rec->ino);
3610                 if (ret < 0)
3611                         break;
3612                 else if (ret == 0)
3613                         goto skip;
3614
3615                 list_for_each_entry(backref, &rec->backrefs, list) {
3616                         BUG_ON(backref->found_inode_ref);
3617                         if (backref->found_dir_item)
3618                                 add_root_backref(dst_cache, rec->ino,
3619                                         root->root_key.objectid, backref->dir,
3620                                         backref->index, backref->name,
3621                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3622                                         backref->errors);
3623                         if (backref->found_dir_index)
3624                                 add_root_backref(dst_cache, rec->ino,
3625                                         root->root_key.objectid, backref->dir,
3626                                         backref->index, backref->name,
3627                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3628                                         backref->errors);
3629                 }
3630 skip:
3631                 free_inode_rec(rec);
3632         }
3633         if (ret < 0)
3634                 return ret;
3635         return 0;
3636 }
3637
3638 static int check_root_refs(struct btrfs_root *root,
3639                            struct cache_tree *root_cache)
3640 {
3641         struct root_record *rec;
3642         struct root_record *ref_root;
3643         struct root_backref *backref;
3644         struct cache_extent *cache;
3645         int loop = 1;
3646         int ret;
3647         int error;
3648         int errors = 0;
3649
3650         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3651         BUG_ON(IS_ERR(rec));
3652         rec->found_ref = 1;
3653
3654         /* fixme: this can not detect circular references */
3655         while (loop) {
3656                 loop = 0;
3657                 cache = search_cache_extent(root_cache, 0);
3658                 while (1) {
3659                         if (!cache)
3660                                 break;
3661                         rec = container_of(cache, struct root_record, cache);
3662                         cache = next_cache_extent(cache);
3663
3664                         if (rec->found_ref == 0)
3665                                 continue;
3666
3667                         list_for_each_entry(backref, &rec->backrefs, list) {
3668                                 if (!backref->reachable)
3669                                         continue;
3670
3671                                 ref_root = get_root_rec(root_cache,
3672                                                         backref->ref_root);
3673                                 BUG_ON(IS_ERR(ref_root));
3674                                 if (ref_root->found_ref > 0)
3675                                         continue;
3676
3677                                 backref->reachable = 0;
3678                                 rec->found_ref--;
3679                                 if (rec->found_ref == 0)
3680                                         loop = 1;
3681                         }
3682                 }
3683         }
3684
3685         cache = search_cache_extent(root_cache, 0);
3686         while (1) {
3687                 if (!cache)
3688                         break;
3689                 rec = container_of(cache, struct root_record, cache);
3690                 cache = next_cache_extent(cache);
3691
3692                 if (rec->found_ref == 0 &&
3693                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3694                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3695                         ret = check_orphan_item(root->fs_info->tree_root,
3696                                                 rec->objectid);
3697                         if (ret == 0)
3698                                 continue;
3699
3700                         /*
3701                          * If we don't have a root item then we likely just have
3702                          * a dir item in a snapshot for this root but no actual
3703                          * ref key or anything so it's meaningless.
3704                          */
3705                         if (!rec->found_root_item)
3706                                 continue;
3707                         errors++;
3708                         fprintf(stderr, "fs tree %llu not referenced\n",
3709                                 (unsigned long long)rec->objectid);
3710                 }
3711
3712                 error = 0;
3713                 if (rec->found_ref > 0 && !rec->found_root_item)
3714                         error = 1;
3715                 list_for_each_entry(backref, &rec->backrefs, list) {
3716                         if (!backref->found_dir_item)
3717                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3718                         if (!backref->found_dir_index)
3719                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3720                         if (!backref->found_back_ref)
3721                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3722                         if (!backref->found_forward_ref)
3723                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3724                         if (backref->reachable && backref->errors)
3725                                 error = 1;
3726                 }
3727                 if (!error)
3728                         continue;
3729
3730                 errors++;
3731                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3732                         (unsigned long long)rec->objectid, rec->found_ref,
3733                          rec->found_root_item ? "" : "not found");
3734
3735                 list_for_each_entry(backref, &rec->backrefs, list) {
3736                         if (!backref->reachable)
3737                                 continue;
3738                         if (!backref->errors && rec->found_root_item)
3739                                 continue;
3740                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3741                                 " index %llu namelen %u name %s errors %x\n",
3742                                 (unsigned long long)backref->ref_root,
3743                                 (unsigned long long)backref->dir,
3744                                 (unsigned long long)backref->index,
3745                                 backref->namelen, backref->name,
3746                                 backref->errors);
3747                         print_ref_error(backref->errors);
3748                 }
3749         }
3750         return errors > 0 ? 1 : 0;
3751 }
3752
3753 static int process_root_ref(struct extent_buffer *eb, int slot,
3754                             struct btrfs_key *key,
3755                             struct cache_tree *root_cache)
3756 {
3757         u64 dirid;
3758         u64 index;
3759         u32 len;
3760         u32 name_len;
3761         struct btrfs_root_ref *ref;
3762         char namebuf[BTRFS_NAME_LEN];
3763         int error;
3764
3765         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3766
3767         dirid = btrfs_root_ref_dirid(eb, ref);
3768         index = btrfs_root_ref_sequence(eb, ref);
3769         name_len = btrfs_root_ref_name_len(eb, ref);
3770
3771         if (name_len <= BTRFS_NAME_LEN) {
3772                 len = name_len;
3773                 error = 0;
3774         } else {
3775                 len = BTRFS_NAME_LEN;
3776                 error = REF_ERR_NAME_TOO_LONG;
3777         }
3778         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3779
3780         if (key->type == BTRFS_ROOT_REF_KEY) {
3781                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3782                                  index, namebuf, len, key->type, error);
3783         } else {
3784                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3785                                  index, namebuf, len, key->type, error);
3786         }
3787         return 0;
3788 }
3789
3790 static void free_corrupt_block(struct cache_extent *cache)
3791 {
3792         struct btrfs_corrupt_block *corrupt;
3793
3794         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3795         free(corrupt);
3796 }
3797
3798 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3799
3800 /*
3801  * Repair the btree of the given root.
3802  *
3803  * The fix is to remove the node key in corrupt_blocks cache_tree.
3804  * and rebalance the tree.
3805  * After the fix, the btree should be writeable.
3806  */
3807 static int repair_btree(struct btrfs_root *root,
3808                         struct cache_tree *corrupt_blocks)
3809 {
3810         struct btrfs_trans_handle *trans;
3811         struct btrfs_path path;
3812         struct btrfs_corrupt_block *corrupt;
3813         struct cache_extent *cache;
3814         struct btrfs_key key;
3815         u64 offset;
3816         int level;
3817         int ret = 0;
3818
3819         if (cache_tree_empty(corrupt_blocks))
3820                 return 0;
3821
3822         trans = btrfs_start_transaction(root, 1);
3823         if (IS_ERR(trans)) {
3824                 ret = PTR_ERR(trans);
3825                 fprintf(stderr, "Error starting transaction: %s\n",
3826                         strerror(-ret));
3827                 return ret;
3828         }
3829         btrfs_init_path(&path);
3830         cache = first_cache_extent(corrupt_blocks);
3831         while (cache) {
3832                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3833                                        cache);
3834                 level = corrupt->level;
3835                 path.lowest_level = level;
3836                 key.objectid = corrupt->key.objectid;
3837                 key.type = corrupt->key.type;
3838                 key.offset = corrupt->key.offset;
3839
3840                 /*
3841                  * Here we don't want to do any tree balance, since it may
3842                  * cause a balance with corrupted brother leaf/node,
3843                  * so ins_len set to 0 here.
3844                  * Balance will be done after all corrupt node/leaf is deleted.
3845                  */
3846                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3847                 if (ret < 0)
3848                         goto out;
3849                 offset = btrfs_node_blockptr(path.nodes[level],
3850                                              path.slots[level]);
3851
3852                 /* Remove the ptr */
3853                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3854                 if (ret < 0)
3855                         goto out;
3856                 /*
3857                  * Remove the corresponding extent
3858                  * return value is not concerned.
3859                  */
3860                 btrfs_release_path(&path);
3861                 ret = btrfs_free_extent(trans, root, offset,
3862                                 root->fs_info->nodesize, 0,
3863                                 root->root_key.objectid, level - 1, 0);
3864                 cache = next_cache_extent(cache);
3865         }
3866
3867         /* Balance the btree using btrfs_search_slot() */
3868         cache = first_cache_extent(corrupt_blocks);
3869         while (cache) {
3870                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3871                                        cache);
3872                 memcpy(&key, &corrupt->key, sizeof(key));
3873                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3874                 if (ret < 0)
3875                         goto out;
3876                 /* return will always >0 since it won't find the item */
3877                 ret = 0;
3878                 btrfs_release_path(&path);
3879                 cache = next_cache_extent(cache);
3880         }
3881 out:
3882         btrfs_commit_transaction(trans, root);
3883         btrfs_release_path(&path);
3884         return ret;
3885 }
3886
3887 static int check_fs_root(struct btrfs_root *root,
3888                          struct cache_tree *root_cache,
3889                          struct walk_control *wc)
3890 {
3891         int ret = 0;
3892         int err = 0;
3893         int wret;
3894         int level;
3895         struct btrfs_path path;
3896         struct shared_node root_node;
3897         struct root_record *rec;
3898         struct btrfs_root_item *root_item = &root->root_item;
3899         struct cache_tree corrupt_blocks;
3900         struct orphan_data_extent *orphan;
3901         struct orphan_data_extent *tmp;
3902         enum btrfs_tree_block_status status;
3903         struct node_refs nrefs;
3904
3905         /*
3906          * Reuse the corrupt_block cache tree to record corrupted tree block
3907          *
3908          * Unlike the usage in extent tree check, here we do it in a per
3909          * fs/subvol tree base.
3910          */
3911         cache_tree_init(&corrupt_blocks);
3912         root->fs_info->corrupt_blocks = &corrupt_blocks;
3913
3914         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3915                 rec = get_root_rec(root_cache, root->root_key.objectid);
3916                 BUG_ON(IS_ERR(rec));
3917                 if (btrfs_root_refs(root_item) > 0)
3918                         rec->found_root_item = 1;
3919         }
3920
3921         btrfs_init_path(&path);
3922         memset(&root_node, 0, sizeof(root_node));
3923         cache_tree_init(&root_node.root_cache);
3924         cache_tree_init(&root_node.inode_cache);
3925         memset(&nrefs, 0, sizeof(nrefs));
3926
3927         /* Move the orphan extent record to corresponding inode_record */
3928         list_for_each_entry_safe(orphan, tmp,
3929                                  &root->orphan_data_extents, list) {
3930                 struct inode_record *inode;
3931
3932                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3933                                       1);
3934                 BUG_ON(IS_ERR(inode));
3935                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3936                 list_move(&orphan->list, &inode->orphan_extents);
3937         }
3938
3939         level = btrfs_header_level(root->node);
3940         memset(wc->nodes, 0, sizeof(wc->nodes));
3941         wc->nodes[level] = &root_node;
3942         wc->active_node = level;
3943         wc->root_level = level;
3944
3945         /* We may not have checked the root block, lets do that now */
3946         if (btrfs_is_leaf(root->node))
3947                 status = btrfs_check_leaf(root, NULL, root->node);
3948         else
3949                 status = btrfs_check_node(root, NULL, root->node);
3950         if (status != BTRFS_TREE_BLOCK_CLEAN)
3951                 return -EIO;
3952
3953         if (btrfs_root_refs(root_item) > 0 ||
3954             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3955                 path.nodes[level] = root->node;
3956                 extent_buffer_get(root->node);
3957                 path.slots[level] = 0;
3958         } else {
3959                 struct btrfs_key key;
3960                 struct btrfs_disk_key found_key;
3961
3962                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3963                 level = root_item->drop_level;
3964                 path.lowest_level = level;
3965                 if (level > btrfs_header_level(root->node) ||
3966                     level >= BTRFS_MAX_LEVEL) {
3967                         error("ignoring invalid drop level: %u", level);
3968                         goto skip_walking;
3969                 }
3970                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3971                 if (wret < 0)
3972                         goto skip_walking;
3973                 btrfs_node_key(path.nodes[level], &found_key,
3974                                 path.slots[level]);
3975                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3976                                         sizeof(found_key)));
3977         }
3978
3979         while (1) {
3980                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3981                 if (wret < 0)
3982                         ret = wret;
3983                 if (wret != 0)
3984                         break;
3985
3986                 wret = walk_up_tree(root, &path, wc, &level);
3987                 if (wret < 0)
3988                         ret = wret;
3989                 if (wret != 0)
3990                         break;
3991         }
3992 skip_walking:
3993         btrfs_release_path(&path);
3994
3995         if (!cache_tree_empty(&corrupt_blocks)) {
3996                 struct cache_extent *cache;
3997                 struct btrfs_corrupt_block *corrupt;
3998
3999                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4000                        root->root_key.objectid);
4001                 cache = first_cache_extent(&corrupt_blocks);
4002                 while (cache) {
4003                         corrupt = container_of(cache,
4004                                                struct btrfs_corrupt_block,
4005                                                cache);
4006                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4007                                cache->start, corrupt->level,
4008                                corrupt->key.objectid, corrupt->key.type,
4009                                corrupt->key.offset);
4010                         cache = next_cache_extent(cache);
4011                 }
4012                 if (repair) {
4013                         printf("Try to repair the btree for root %llu\n",
4014                                root->root_key.objectid);
4015                         ret = repair_btree(root, &corrupt_blocks);
4016                         if (ret < 0)
4017                                 fprintf(stderr, "Failed to repair btree: %s\n",
4018                                         strerror(-ret));
4019                         if (!ret)
4020                                 printf("Btree for root %llu is fixed\n",
4021                                        root->root_key.objectid);
4022                 }
4023         }
4024
4025         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4026         if (err < 0)
4027                 ret = err;
4028
4029         if (root_node.current) {
4030                 root_node.current->checked = 1;
4031                 maybe_free_inode_rec(&root_node.inode_cache,
4032                                 root_node.current);
4033         }
4034
4035         err = check_inode_recs(root, &root_node.inode_cache);
4036         if (!ret)
4037                 ret = err;
4038
4039         free_corrupt_blocks_tree(&corrupt_blocks);
4040         root->fs_info->corrupt_blocks = NULL;
4041         free_orphan_data_extents(&root->orphan_data_extents);
4042         return ret;
4043 }
4044
4045 static int fs_root_objectid(u64 objectid)
4046 {
4047         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4048             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4049                 return 1;
4050         return is_fstree(objectid);
4051 }
4052
4053 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4054                           struct cache_tree *root_cache)
4055 {
4056         struct btrfs_path path;
4057         struct btrfs_key key;
4058         struct walk_control wc;
4059         struct extent_buffer *leaf, *tree_node;
4060         struct btrfs_root *tmp_root;
4061         struct btrfs_root *tree_root = fs_info->tree_root;
4062         int ret;
4063         int err = 0;
4064
4065         if (ctx.progress_enabled) {
4066                 ctx.tp = TASK_FS_ROOTS;
4067                 task_start(ctx.info);
4068         }
4069
4070         /*
4071          * Just in case we made any changes to the extent tree that weren't
4072          * reflected into the free space cache yet.
4073          */
4074         if (repair)
4075                 reset_cached_block_groups(fs_info);
4076         memset(&wc, 0, sizeof(wc));
4077         cache_tree_init(&wc.shared);
4078         btrfs_init_path(&path);
4079
4080 again:
4081         key.offset = 0;
4082         key.objectid = 0;
4083         key.type = BTRFS_ROOT_ITEM_KEY;
4084         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4085         if (ret < 0) {
4086                 err = 1;
4087                 goto out;
4088         }
4089         tree_node = tree_root->node;
4090         while (1) {
4091                 if (tree_node != tree_root->node) {
4092                         free_root_recs_tree(root_cache);
4093                         btrfs_release_path(&path);
4094                         goto again;
4095                 }
4096                 leaf = path.nodes[0];
4097                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4098                         ret = btrfs_next_leaf(tree_root, &path);
4099                         if (ret) {
4100                                 if (ret < 0)
4101                                         err = 1;
4102                                 break;
4103                         }
4104                         leaf = path.nodes[0];
4105                 }
4106                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4107                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4108                     fs_root_objectid(key.objectid)) {
4109                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4110                                 tmp_root = btrfs_read_fs_root_no_cache(
4111                                                 fs_info, &key);
4112                         } else {
4113                                 key.offset = (u64)-1;
4114                                 tmp_root = btrfs_read_fs_root(
4115                                                 fs_info, &key);
4116                         }
4117                         if (IS_ERR(tmp_root)) {
4118                                 err = 1;
4119                                 goto next;
4120                         }
4121                         ret = check_fs_root(tmp_root, root_cache, &wc);
4122                         if (ret == -EAGAIN) {
4123                                 free_root_recs_tree(root_cache);
4124                                 btrfs_release_path(&path);
4125                                 goto again;
4126                         }
4127                         if (ret)
4128                                 err = 1;
4129                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4130                                 btrfs_free_fs_root(tmp_root);
4131                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4132                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4133                         process_root_ref(leaf, path.slots[0], &key,
4134                                          root_cache);
4135                 }
4136 next:
4137                 path.slots[0]++;
4138         }
4139 out:
4140         btrfs_release_path(&path);
4141         if (err)
4142                 free_extent_cache_tree(&wc.shared);
4143         if (!cache_tree_empty(&wc.shared))
4144                 fprintf(stderr, "warning line %d\n", __LINE__);
4145
4146         task_stop(ctx.info);
4147
4148         return err;
4149 }
4150
4151 /*
4152  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4153  * INODE_REF/INODE_EXTREF match.
4154  *
4155  * @root:       the root of the fs/file tree
4156  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4157  * @key:        the key of the DIR_ITEM/DIR_INDEX
4158  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4159  *              distinguish root_dir between normal dir/file
4160  * @name:       the name in the INODE_REF/INODE_EXTREF
4161  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4162  * @mode:       the st_mode of INODE_ITEM
4163  *
4164  * Return 0 if no error occurred.
4165  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4166  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4167  * dir/file.
4168  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4169  * not match for normal dir/file.
4170  */
4171 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4172                          struct btrfs_key *key, u64 index, char *name,
4173                          u32 namelen, u32 mode)
4174 {
4175         struct btrfs_path path;
4176         struct extent_buffer *node;
4177         struct btrfs_dir_item *di;
4178         struct btrfs_key location;
4179         char namebuf[BTRFS_NAME_LEN] = {0};
4180         u32 total;
4181         u32 cur = 0;
4182         u32 len;
4183         u32 name_len;
4184         u32 data_len;
4185         u8 filetype;
4186         int slot;
4187         int ret;
4188
4189         btrfs_init_path(&path);
4190         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4191         if (ret < 0) {
4192                 ret = DIR_ITEM_MISSING;
4193                 goto out;
4194         }
4195
4196         /* Process root dir and goto out*/
4197         if (index == 0) {
4198                 if (ret == 0) {
4199                         ret = ROOT_DIR_ERROR;
4200                         error(
4201                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4202                                 root->objectid,
4203                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4204                                         "REF" : "EXTREF",
4205                                 ref_key->objectid, ref_key->offset,
4206                                 key->type == BTRFS_DIR_ITEM_KEY ?
4207                                         "DIR_ITEM" : "DIR_INDEX");
4208                 } else {
4209                         ret = 0;
4210                 }
4211
4212                 goto out;
4213         }
4214
4215         /* Process normal file/dir */
4216         if (ret > 0) {
4217                 ret = DIR_ITEM_MISSING;
4218                 error(
4219                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4220                         root->objectid,
4221                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4222                         ref_key->objectid, ref_key->offset,
4223                         key->type == BTRFS_DIR_ITEM_KEY ?
4224                                 "DIR_ITEM" : "DIR_INDEX",
4225                         key->objectid, key->offset, namelen, name,
4226                         imode_to_type(mode));
4227                 goto out;
4228         }
4229
4230         /* Check whether inode_id/filetype/name match */
4231         node = path.nodes[0];
4232         slot = path.slots[0];
4233         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4234         total = btrfs_item_size_nr(node, slot);
4235         while (cur < total) {
4236                 ret = DIR_ITEM_MISMATCH;
4237                 name_len = btrfs_dir_name_len(node, di);
4238                 data_len = btrfs_dir_data_len(node, di);
4239
4240                 btrfs_dir_item_key_to_cpu(node, di, &location);
4241                 if (location.objectid != ref_key->objectid ||
4242                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4243                     location.offset != 0)
4244                         goto next;
4245
4246                 filetype = btrfs_dir_type(node, di);
4247                 if (imode_to_type(mode) != filetype)
4248                         goto next;
4249
4250                 if (cur + sizeof(*di) + name_len > total ||
4251                     name_len > BTRFS_NAME_LEN) {
4252                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4253                                 root->objectid,
4254                                 key->type == BTRFS_DIR_ITEM_KEY ?
4255                                 "DIR_ITEM" : "DIR_INDEX",
4256                                 key->objectid, key->offset, name_len);
4257
4258                         if (cur + sizeof(*di) > total)
4259                                 break;
4260                         len = min_t(u32, total - cur - sizeof(*di),
4261                                     BTRFS_NAME_LEN);
4262                 } else {
4263                         len = name_len;
4264                 }
4265
4266                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4267                 if (len != namelen || strncmp(namebuf, name, len))
4268                         goto next;
4269
4270                 ret = 0;
4271                 goto out;
4272 next:
4273                 len = sizeof(*di) + name_len + data_len;
4274                 di = (struct btrfs_dir_item *)((char *)di + len);
4275                 cur += len;
4276         }
4277         if (ret == DIR_ITEM_MISMATCH)
4278                 error(
4279                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4280                         root->objectid,
4281                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4282                         ref_key->objectid, ref_key->offset,
4283                         key->type == BTRFS_DIR_ITEM_KEY ?
4284                                 "DIR_ITEM" : "DIR_INDEX",
4285                         key->objectid, key->offset, namelen, name,
4286                         imode_to_type(mode));
4287 out:
4288         btrfs_release_path(&path);
4289         return ret;
4290 }
4291
4292 /*
4293  * Traverse the given INODE_REF and call find_dir_item() to find related
4294  * DIR_ITEM/DIR_INDEX.
4295  *
4296  * @root:       the root of the fs/file tree
4297  * @ref_key:    the key of the INODE_REF
4298  * @refs:       the count of INODE_REF
4299  * @mode:       the st_mode of INODE_ITEM
4300  *
4301  * Return 0 if no error occurred.
4302  */
4303 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4304                            struct extent_buffer *node, int slot, u64 *refs,
4305                            int mode)
4306 {
4307         struct btrfs_key key;
4308         struct btrfs_inode_ref *ref;
4309         char namebuf[BTRFS_NAME_LEN] = {0};
4310         u32 total;
4311         u32 cur = 0;
4312         u32 len;
4313         u32 name_len;
4314         u64 index;
4315         int ret, err = 0;
4316
4317         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4318         total = btrfs_item_size_nr(node, slot);
4319
4320 next:
4321         /* Update inode ref count */
4322         (*refs)++;
4323
4324         index = btrfs_inode_ref_index(node, ref);
4325         name_len = btrfs_inode_ref_name_len(node, ref);
4326         if (cur + sizeof(*ref) + name_len > total ||
4327             name_len > BTRFS_NAME_LEN) {
4328                 warning("root %llu INODE_REF[%llu %llu] name too long",
4329                         root->objectid, ref_key->objectid, ref_key->offset);
4330
4331                 if (total < cur + sizeof(*ref))
4332                         goto out;
4333                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4334         } else {
4335                 len = name_len;
4336         }
4337
4338         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4339
4340         /* Check root dir ref name */
4341         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4342                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4343                       root->objectid, ref_key->objectid, ref_key->offset,
4344                       namebuf);
4345                 err |= ROOT_DIR_ERROR;
4346         }
4347
4348         /* Find related DIR_INDEX */
4349         key.objectid = ref_key->offset;
4350         key.type = BTRFS_DIR_INDEX_KEY;
4351         key.offset = index;
4352         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4353         err |= ret;
4354
4355         /* Find related dir_item */
4356         key.objectid = ref_key->offset;
4357         key.type = BTRFS_DIR_ITEM_KEY;
4358         key.offset = btrfs_name_hash(namebuf, len);
4359         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4360         err |= ret;
4361
4362         len = sizeof(*ref) + name_len;
4363         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4364         cur += len;
4365         if (cur < total)
4366                 goto next;
4367
4368 out:
4369         return err;
4370 }
4371
4372 /*
4373  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4374  * DIR_ITEM/DIR_INDEX.
4375  *
4376  * @root:       the root of the fs/file tree
4377  * @ref_key:    the key of the INODE_EXTREF
4378  * @refs:       the count of INODE_EXTREF
4379  * @mode:       the st_mode of INODE_ITEM
4380  *
4381  * Return 0 if no error occurred.
4382  */
4383 static int check_inode_extref(struct btrfs_root *root,
4384                               struct btrfs_key *ref_key,
4385                               struct extent_buffer *node, int slot, u64 *refs,
4386                               int mode)
4387 {
4388         struct btrfs_key key;
4389         struct btrfs_inode_extref *extref;
4390         char namebuf[BTRFS_NAME_LEN] = {0};
4391         u32 total;
4392         u32 cur = 0;
4393         u32 len;
4394         u32 name_len;
4395         u64 index;
4396         u64 parent;
4397         int ret;
4398         int err = 0;
4399
4400         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4401         total = btrfs_item_size_nr(node, slot);
4402
4403 next:
4404         /* update inode ref count */
4405         (*refs)++;
4406         name_len = btrfs_inode_extref_name_len(node, extref);
4407         index = btrfs_inode_extref_index(node, extref);
4408         parent = btrfs_inode_extref_parent(node, extref);
4409         if (name_len <= BTRFS_NAME_LEN) {
4410                 len = name_len;
4411         } else {
4412                 len = BTRFS_NAME_LEN;
4413                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4414                         root->objectid, ref_key->objectid, ref_key->offset);
4415         }
4416         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4417
4418         /* Check root dir ref name */
4419         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4420                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4421                       root->objectid, ref_key->objectid, ref_key->offset,
4422                       namebuf);
4423                 err |= ROOT_DIR_ERROR;
4424         }
4425
4426         /* find related dir_index */
4427         key.objectid = parent;
4428         key.type = BTRFS_DIR_INDEX_KEY;
4429         key.offset = index;
4430         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4431         err |= ret;
4432
4433         /* find related dir_item */
4434         key.objectid = parent;
4435         key.type = BTRFS_DIR_ITEM_KEY;
4436         key.offset = btrfs_name_hash(namebuf, len);
4437         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4438         err |= ret;
4439
4440         len = sizeof(*extref) + name_len;
4441         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4442         cur += len;
4443
4444         if (cur < total)
4445                 goto next;
4446
4447         return err;
4448 }
4449
4450 /*
4451  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4452  * DIR_ITEM/DIR_INDEX match.
4453  *
4454  * @root:       the root of the fs/file tree
4455  * @key:        the key of the INODE_REF/INODE_EXTREF
4456  * @name:       the name in the INODE_REF/INODE_EXTREF
4457  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4458  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4459  * to (u64)-1
4460  * @ext_ref:    the EXTENDED_IREF feature
4461  *
4462  * Return 0 if no error occurred.
4463  * Return >0 for error bitmap
4464  */
4465 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4466                           char *name, int namelen, u64 index,
4467                           unsigned int ext_ref)
4468 {
4469         struct btrfs_path path;
4470         struct btrfs_inode_ref *ref;
4471         struct btrfs_inode_extref *extref;
4472         struct extent_buffer *node;
4473         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4474         u32 total;
4475         u32 cur = 0;
4476         u32 len;
4477         u32 ref_namelen;
4478         u64 ref_index;
4479         u64 parent;
4480         u64 dir_id;
4481         int slot;
4482         int ret;
4483
4484         btrfs_init_path(&path);
4485         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4486         if (ret) {
4487                 ret = INODE_REF_MISSING;
4488                 goto extref;
4489         }
4490
4491         node = path.nodes[0];
4492         slot = path.slots[0];
4493
4494         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4495         total = btrfs_item_size_nr(node, slot);
4496
4497         /* Iterate all entry of INODE_REF */
4498         while (cur < total) {
4499                 ret = INODE_REF_MISSING;
4500
4501                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4502                 ref_index = btrfs_inode_ref_index(node, ref);
4503                 if (index != (u64)-1 && index != ref_index)
4504                         goto next_ref;
4505
4506                 if (cur + sizeof(*ref) + ref_namelen > total ||
4507                     ref_namelen > BTRFS_NAME_LEN) {
4508                         warning("root %llu INODE %s[%llu %llu] name too long",
4509                                 root->objectid,
4510                                 key->type == BTRFS_INODE_REF_KEY ?
4511                                         "REF" : "EXTREF",
4512                                 key->objectid, key->offset);
4513
4514                         if (cur + sizeof(*ref) > total)
4515                                 break;
4516                         len = min_t(u32, total - cur - sizeof(*ref),
4517                                     BTRFS_NAME_LEN);
4518                 } else {
4519                         len = ref_namelen;
4520                 }
4521
4522                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4523                                    len);
4524
4525                 if (len != namelen || strncmp(ref_namebuf, name, len))
4526                         goto next_ref;
4527
4528                 ret = 0;
4529                 goto out;
4530 next_ref:
4531                 len = sizeof(*ref) + ref_namelen;
4532                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4533                 cur += len;
4534         }
4535
4536 extref:
4537         /* Skip if not support EXTENDED_IREF feature */
4538         if (!ext_ref)
4539                 goto out;
4540
4541         btrfs_release_path(&path);
4542         btrfs_init_path(&path);
4543
4544         dir_id = key->offset;
4545         key->type = BTRFS_INODE_EXTREF_KEY;
4546         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4547
4548         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4549         if (ret) {
4550                 ret = INODE_REF_MISSING;
4551                 goto out;
4552         }
4553
4554         node = path.nodes[0];
4555         slot = path.slots[0];
4556
4557         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4558         cur = 0;
4559         total = btrfs_item_size_nr(node, slot);
4560
4561         /* Iterate all entry of INODE_EXTREF */
4562         while (cur < total) {
4563                 ret = INODE_REF_MISSING;
4564
4565                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4566                 ref_index = btrfs_inode_extref_index(node, extref);
4567                 parent = btrfs_inode_extref_parent(node, extref);
4568                 if (index != (u64)-1 && index != ref_index)
4569                         goto next_extref;
4570
4571                 if (parent != dir_id)
4572                         goto next_extref;
4573
4574                 if (ref_namelen <= BTRFS_NAME_LEN) {
4575                         len = ref_namelen;
4576                 } else {
4577                         len = BTRFS_NAME_LEN;
4578                         warning("root %llu INODE %s[%llu %llu] name too long",
4579                                 root->objectid,
4580                                 key->type == BTRFS_INODE_REF_KEY ?
4581                                         "REF" : "EXTREF",
4582                                 key->objectid, key->offset);
4583                 }
4584                 read_extent_buffer(node, ref_namebuf,
4585                                    (unsigned long)(extref + 1), len);
4586
4587                 if (len != namelen || strncmp(ref_namebuf, name, len))
4588                         goto next_extref;
4589
4590                 ret = 0;
4591                 goto out;
4592
4593 next_extref:
4594                 len = sizeof(*extref) + ref_namelen;
4595                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4596                 cur += len;
4597
4598         }
4599 out:
4600         btrfs_release_path(&path);
4601         return ret;
4602 }
4603
4604 /*
4605  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4606  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4607  *
4608  * @root:       the root of the fs/file tree
4609  * @key:        the key of the INODE_REF/INODE_EXTREF
4610  * @size:       the st_size of the INODE_ITEM
4611  * @ext_ref:    the EXTENDED_IREF feature
4612  *
4613  * Return 0 if no error occurred.
4614  */
4615 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4616                           struct extent_buffer *node, int slot, u64 *size,
4617                           unsigned int ext_ref)
4618 {
4619         struct btrfs_dir_item *di;
4620         struct btrfs_inode_item *ii;
4621         struct btrfs_path path;
4622         struct btrfs_key location;
4623         char namebuf[BTRFS_NAME_LEN] = {0};
4624         u32 total;
4625         u32 cur = 0;
4626         u32 len;
4627         u32 name_len;
4628         u32 data_len;
4629         u8 filetype;
4630         u32 mode;
4631         u64 index;
4632         int ret;
4633         int err = 0;
4634
4635         /*
4636          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4637          * ignore index check.
4638          */
4639         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4640
4641         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4642         total = btrfs_item_size_nr(node, slot);
4643
4644         while (cur < total) {
4645                 data_len = btrfs_dir_data_len(node, di);
4646                 if (data_len)
4647                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4648                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4649                               "DIR_ITEM" : "DIR_INDEX",
4650                               key->objectid, key->offset, data_len);
4651
4652                 name_len = btrfs_dir_name_len(node, di);
4653                 if (cur + sizeof(*di) + name_len > total ||
4654                     name_len > BTRFS_NAME_LEN) {
4655                         warning("root %llu %s[%llu %llu] name too long",
4656                                 root->objectid,
4657                                 key->type == BTRFS_DIR_ITEM_KEY ?
4658                                 "DIR_ITEM" : "DIR_INDEX",
4659                                 key->objectid, key->offset);
4660
4661                         if (cur + sizeof(*di) > total)
4662                                 break;
4663                         len = min_t(u32, total - cur - sizeof(*di),
4664                                     BTRFS_NAME_LEN);
4665                 } else {
4666                         len = name_len;
4667                 }
4668                 (*size) += name_len;
4669
4670                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4671                 filetype = btrfs_dir_type(node, di);
4672
4673                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4674                     key->offset != btrfs_name_hash(namebuf, len)) {
4675                         err |= -EIO;
4676                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4677                                 root->objectid, key->objectid, key->offset,
4678                                 namebuf, len, filetype, key->offset,
4679                                 btrfs_name_hash(namebuf, len));
4680                 }
4681
4682                 btrfs_init_path(&path);
4683                 btrfs_dir_item_key_to_cpu(node, di, &location);
4684
4685                 /* Ignore related ROOT_ITEM check */
4686                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4687                         goto next;
4688
4689                 /* Check relative INODE_ITEM(existence/filetype) */
4690                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4691                 if (ret) {
4692                         err |= INODE_ITEM_MISSING;
4693                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4694                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4695                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4696                               key->offset, location.objectid, name_len,
4697                               namebuf, filetype);
4698                         goto next;
4699                 }
4700
4701                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4702                                     struct btrfs_inode_item);
4703                 mode = btrfs_inode_mode(path.nodes[0], ii);
4704
4705                 if (imode_to_type(mode) != filetype) {
4706                         err |= INODE_ITEM_MISMATCH;
4707                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4708                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4709                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4710                               key->offset, name_len, namebuf, filetype);
4711                 }
4712
4713                 /* Check relative INODE_REF/INODE_EXTREF */
4714                 location.type = BTRFS_INODE_REF_KEY;
4715                 location.offset = key->objectid;
4716                 ret = find_inode_ref(root, &location, namebuf, len,
4717                                        index, ext_ref);
4718                 err |= ret;
4719                 if (ret & INODE_REF_MISSING)
4720                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4721                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4722                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4723                               key->offset, name_len, namebuf, filetype);
4724
4725 next:
4726                 btrfs_release_path(&path);
4727                 len = sizeof(*di) + name_len + data_len;
4728                 di = (struct btrfs_dir_item *)((char *)di + len);
4729                 cur += len;
4730
4731                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4732                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4733                               root->objectid, key->objectid, key->offset);
4734                         break;
4735                 }
4736         }
4737
4738         return err;
4739 }
4740
4741 /*
4742  * Check file extent datasum/hole, update the size of the file extents,
4743  * check and update the last offset of the file extent.
4744  *
4745  * @root:       the root of fs/file tree.
4746  * @fkey:       the key of the file extent.
4747  * @nodatasum:  INODE_NODATASUM feature.
4748  * @size:       the sum of all EXTENT_DATA items size for this inode.
4749  * @end:        the offset of the last extent.
4750  *
4751  * Return 0 if no error occurred.
4752  */
4753 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4754                              struct extent_buffer *node, int slot,
4755                              unsigned int nodatasum, u64 *size, u64 *end)
4756 {
4757         struct btrfs_file_extent_item *fi;
4758         u64 disk_bytenr;
4759         u64 disk_num_bytes;
4760         u64 extent_num_bytes;
4761         u64 extent_offset;
4762         u64 csum_found;         /* In byte size, sectorsize aligned */
4763         u64 search_start;       /* Logical range start we search for csum */
4764         u64 search_len;         /* Logical range len we search for csum */
4765         unsigned int extent_type;
4766         unsigned int is_hole;
4767         int compressed = 0;
4768         int ret;
4769         int err = 0;
4770
4771         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4772
4773         /* Check inline extent */
4774         extent_type = btrfs_file_extent_type(node, fi);
4775         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4776                 struct btrfs_item *e = btrfs_item_nr(slot);
4777                 u32 item_inline_len;
4778
4779                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4780                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4781                 compressed = btrfs_file_extent_compression(node, fi);
4782                 if (extent_num_bytes == 0) {
4783                         error(
4784                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4785                                 root->objectid, fkey->objectid, fkey->offset);
4786                         err |= FILE_EXTENT_ERROR;
4787                 }
4788                 if (!compressed && extent_num_bytes != item_inline_len) {
4789                         error(
4790                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4791                                 root->objectid, fkey->objectid, fkey->offset,
4792                                 extent_num_bytes, item_inline_len);
4793                         err |= FILE_EXTENT_ERROR;
4794                 }
4795                 *end += extent_num_bytes;
4796                 *size += extent_num_bytes;
4797                 return err;
4798         }
4799
4800         /* Check extent type */
4801         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4802                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4803                 err |= FILE_EXTENT_ERROR;
4804                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4805                       root->objectid, fkey->objectid, fkey->offset);
4806                 return err;
4807         }
4808
4809         /* Check REG_EXTENT/PREALLOC_EXTENT */
4810         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4811         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4812         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4813         extent_offset = btrfs_file_extent_offset(node, fi);
4814         compressed = btrfs_file_extent_compression(node, fi);
4815         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4816
4817         /*
4818          * Check EXTENT_DATA csum
4819          *
4820          * For plain (uncompressed) extent, we should only check the range
4821          * we're referring to, as it's possible that part of prealloc extent
4822          * has been written, and has csum:
4823          *
4824          * |<--- Original large preallocated extent A ---->|
4825          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4826          *      No csum                         Has csum
4827          *
4828          * For compressed extent, we should check the whole range.
4829          */
4830         if (!compressed) {
4831                 search_start = disk_bytenr + extent_offset;
4832                 search_len = extent_num_bytes;
4833         } else {
4834                 search_start = disk_bytenr;
4835                 search_len = disk_num_bytes;
4836         }
4837         ret = count_csum_range(root, search_start, search_len, &csum_found);
4838         if (csum_found > 0 && nodatasum) {
4839                 err |= ODD_CSUM_ITEM;
4840                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4841                       root->objectid, fkey->objectid, fkey->offset);
4842         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4843                    !is_hole && (ret < 0 || csum_found < search_len)) {
4844                 err |= CSUM_ITEM_MISSING;
4845                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4846                       root->objectid, fkey->objectid, fkey->offset,
4847                       csum_found, search_len);
4848         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4849                 err |= ODD_CSUM_ITEM;
4850                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4851                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4852         }
4853
4854         /* Check EXTENT_DATA hole */
4855         if (!no_holes && *end != fkey->offset) {
4856                 err |= FILE_EXTENT_ERROR;
4857                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4858                       root->objectid, fkey->objectid, fkey->offset);
4859         }
4860
4861         *end += extent_num_bytes;
4862         if (!is_hole)
4863                 *size += extent_num_bytes;
4864
4865         return err;
4866 }
4867
4868 /*
4869  * Check INODE_ITEM and related ITEMs (the same inode number)
4870  * 1. check link count
4871  * 2. check inode ref/extref
4872  * 3. check dir item/index
4873  *
4874  * @ext_ref:    the EXTENDED_IREF feature
4875  *
4876  * Return 0 if no error occurred.
4877  * Return >0 for error or hit the traversal is done(by error bitmap)
4878  */
4879 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4880                             unsigned int ext_ref)
4881 {
4882         struct extent_buffer *node;
4883         struct btrfs_inode_item *ii;
4884         struct btrfs_key key;
4885         u64 inode_id;
4886         u32 mode;
4887         u64 nlink;
4888         u64 nbytes;
4889         u64 isize;
4890         u64 size = 0;
4891         u64 refs = 0;
4892         u64 extent_end = 0;
4893         u64 extent_size = 0;
4894         unsigned int dir;
4895         unsigned int nodatasum;
4896         int slot;
4897         int ret;
4898         int err = 0;
4899
4900         node = path->nodes[0];
4901         slot = path->slots[0];
4902
4903         btrfs_item_key_to_cpu(node, &key, slot);
4904         inode_id = key.objectid;
4905
4906         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4907                 ret = btrfs_next_item(root, path);
4908                 if (ret > 0)
4909                         err |= LAST_ITEM;
4910                 return err;
4911         }
4912
4913         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4914         isize = btrfs_inode_size(node, ii);
4915         nbytes = btrfs_inode_nbytes(node, ii);
4916         mode = btrfs_inode_mode(node, ii);
4917         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4918         nlink = btrfs_inode_nlink(node, ii);
4919         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4920
4921         while (1) {
4922                 ret = btrfs_next_item(root, path);
4923                 if (ret < 0) {
4924                         /* out will fill 'err' rusing current statistics */
4925                         goto out;
4926                 } else if (ret > 0) {
4927                         err |= LAST_ITEM;
4928                         goto out;
4929                 }
4930
4931                 node = path->nodes[0];
4932                 slot = path->slots[0];
4933                 btrfs_item_key_to_cpu(node, &key, slot);
4934                 if (key.objectid != inode_id)
4935                         goto out;
4936
4937                 switch (key.type) {
4938                 case BTRFS_INODE_REF_KEY:
4939                         ret = check_inode_ref(root, &key, node, slot, &refs,
4940                                               mode);
4941                         err |= ret;
4942                         break;
4943                 case BTRFS_INODE_EXTREF_KEY:
4944                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4945                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4946                                         root->objectid, key.objectid,
4947                                         key.offset);
4948                         ret = check_inode_extref(root, &key, node, slot, &refs,
4949                                                  mode);
4950                         err |= ret;
4951                         break;
4952                 case BTRFS_DIR_ITEM_KEY:
4953                 case BTRFS_DIR_INDEX_KEY:
4954                         if (!dir) {
4955                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4956                                         root->objectid, inode_id,
4957                                         imode_to_type(mode), key.objectid,
4958                                         key.offset);
4959                         }
4960                         ret = check_dir_item(root, &key, node, slot, &size,
4961                                              ext_ref);
4962                         err |= ret;
4963                         break;
4964                 case BTRFS_EXTENT_DATA_KEY:
4965                         if (dir) {
4966                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4967                                         root->objectid, inode_id, key.objectid,
4968                                         key.offset);
4969                         }
4970                         ret = check_file_extent(root, &key, node, slot,
4971                                                 nodatasum, &extent_size,
4972                                                 &extent_end);
4973                         err |= ret;
4974                         break;
4975                 case BTRFS_XATTR_ITEM_KEY:
4976                         break;
4977                 default:
4978                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4979                               key.objectid, key.type, key.offset);
4980                 }
4981         }
4982
4983 out:
4984         /* verify INODE_ITEM nlink/isize/nbytes */
4985         if (dir) {
4986                 if (nlink != 1) {
4987                         err |= LINK_COUNT_ERROR;
4988                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4989                               root->objectid, inode_id, nlink);
4990                 }
4991
4992                 /*
4993                  * Just a warning, as dir inode nbytes is just an
4994                  * instructive value.
4995                  */
4996                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4997                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4998                                 root->objectid, inode_id,
4999                                 root->fs_info->nodesize);
5000                 }
5001
5002                 if (isize != size) {
5003                         err |= ISIZE_ERROR;
5004                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5005                               root->objectid, inode_id, isize, size);
5006                 }
5007         } else {
5008                 if (nlink != refs) {
5009                         err |= LINK_COUNT_ERROR;
5010                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5011                               root->objectid, inode_id, nlink, refs);
5012                 } else if (!nlink) {
5013                         err |= ORPHAN_ITEM;
5014                 }
5015
5016                 if (!nbytes && !no_holes && extent_end < isize) {
5017                         err |= NBYTES_ERROR;
5018                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5019                               root->objectid, inode_id, isize);
5020                 }
5021
5022                 if (nbytes != extent_size) {
5023                         err |= NBYTES_ERROR;
5024                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5025                               root->objectid, inode_id, nbytes, extent_size);
5026                 }
5027         }
5028
5029         return err;
5030 }
5031
5032 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5033 {
5034         struct btrfs_path path;
5035         struct btrfs_key key;
5036         int err = 0;
5037         int ret;
5038
5039         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5040         key.type = BTRFS_INODE_ITEM_KEY;
5041         key.offset = 0;
5042
5043         /* For root being dropped, we don't need to check first inode */
5044         if (btrfs_root_refs(&root->root_item) == 0 &&
5045             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5046             key.objectid)
5047                 return 0;
5048
5049         btrfs_init_path(&path);
5050
5051         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5052         if (ret < 0)
5053                 goto out;
5054         if (ret > 0) {
5055                 ret = 0;
5056                 err |= INODE_ITEM_MISSING;
5057                 error("first inode item of root %llu is missing",
5058                       root->objectid);
5059         }
5060
5061         err |= check_inode_item(root, &path, ext_ref);
5062         err &= ~LAST_ITEM;
5063         if (err && !ret)
5064                 ret = -EIO;
5065 out:
5066         btrfs_release_path(&path);
5067         return ret;
5068 }
5069
5070 /*
5071  * Iterate all item on the tree and call check_inode_item() to check.
5072  *
5073  * @root:       the root of the tree to be checked.
5074  * @ext_ref:    the EXTENDED_IREF feature
5075  *
5076  * Return 0 if no error found.
5077  * Return <0 for error.
5078  */
5079 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5080 {
5081         struct btrfs_path path;
5082         struct node_refs nrefs;
5083         struct btrfs_root_item *root_item = &root->root_item;
5084         int ret;
5085         int level;
5086         int err = 0;
5087
5088         /*
5089          * We need to manually check the first inode item(256)
5090          * As the following traversal function will only start from
5091          * the first inode item in the leaf, if inode item(256) is missing
5092          * we will just skip it forever.
5093          */
5094         ret = check_fs_first_inode(root, ext_ref);
5095         if (ret < 0)
5096                 return ret;
5097
5098         memset(&nrefs, 0, sizeof(nrefs));
5099         level = btrfs_header_level(root->node);
5100         btrfs_init_path(&path);
5101
5102         if (btrfs_root_refs(root_item) > 0 ||
5103             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5104                 path.nodes[level] = root->node;
5105                 path.slots[level] = 0;
5106                 extent_buffer_get(root->node);
5107         } else {
5108                 struct btrfs_key key;
5109
5110                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5111                 level = root_item->drop_level;
5112                 path.lowest_level = level;
5113                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5114                 if (ret < 0)
5115                         goto out;
5116                 ret = 0;
5117         }
5118
5119         while (1) {
5120                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5121                 err |= !!ret;
5122
5123                 /* if ret is negative, walk shall stop */
5124                 if (ret < 0) {
5125                         ret = err;
5126                         break;
5127                 }
5128
5129                 ret = walk_up_tree_v2(root, &path, &level);
5130                 if (ret != 0) {
5131                         /* Normal exit, reset ret to err */
5132                         ret = err;
5133                         break;
5134                 }
5135         }
5136
5137 out:
5138         btrfs_release_path(&path);
5139         return ret;
5140 }
5141
5142 /*
5143  * Find the relative ref for root_ref and root_backref.
5144  *
5145  * @root:       the root of the root tree.
5146  * @ref_key:    the key of the root ref.
5147  *
5148  * Return 0 if no error occurred.
5149  */
5150 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5151                           struct extent_buffer *node, int slot)
5152 {
5153         struct btrfs_path path;
5154         struct btrfs_key key;
5155         struct btrfs_root_ref *ref;
5156         struct btrfs_root_ref *backref;
5157         char ref_name[BTRFS_NAME_LEN] = {0};
5158         char backref_name[BTRFS_NAME_LEN] = {0};
5159         u64 ref_dirid;
5160         u64 ref_seq;
5161         u32 ref_namelen;
5162         u64 backref_dirid;
5163         u64 backref_seq;
5164         u32 backref_namelen;
5165         u32 len;
5166         int ret;
5167         int err = 0;
5168
5169         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5170         ref_dirid = btrfs_root_ref_dirid(node, ref);
5171         ref_seq = btrfs_root_ref_sequence(node, ref);
5172         ref_namelen = btrfs_root_ref_name_len(node, ref);
5173
5174         if (ref_namelen <= BTRFS_NAME_LEN) {
5175                 len = ref_namelen;
5176         } else {
5177                 len = BTRFS_NAME_LEN;
5178                 warning("%s[%llu %llu] ref_name too long",
5179                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5180                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5181                         ref_key->offset);
5182         }
5183         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5184
5185         /* Find relative root_ref */
5186         key.objectid = ref_key->offset;
5187         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5188         key.offset = ref_key->objectid;
5189
5190         btrfs_init_path(&path);
5191         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5192         if (ret) {
5193                 err |= ROOT_REF_MISSING;
5194                 error("%s[%llu %llu] couldn't find relative ref",
5195                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5196                       "ROOT_REF" : "ROOT_BACKREF",
5197                       ref_key->objectid, ref_key->offset);
5198                 goto out;
5199         }
5200
5201         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5202                                  struct btrfs_root_ref);
5203         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5204         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5205         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5206
5207         if (backref_namelen <= BTRFS_NAME_LEN) {
5208                 len = backref_namelen;
5209         } else {
5210                 len = BTRFS_NAME_LEN;
5211                 warning("%s[%llu %llu] ref_name too long",
5212                         key.type == BTRFS_ROOT_REF_KEY ?
5213                         "ROOT_REF" : "ROOT_BACKREF",
5214                         key.objectid, key.offset);
5215         }
5216         read_extent_buffer(path.nodes[0], backref_name,
5217                            (unsigned long)(backref + 1), len);
5218
5219         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5220             ref_namelen != backref_namelen ||
5221             strncmp(ref_name, backref_name, len)) {
5222                 err |= ROOT_REF_MISMATCH;
5223                 error("%s[%llu %llu] mismatch relative ref",
5224                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5225                       "ROOT_REF" : "ROOT_BACKREF",
5226                       ref_key->objectid, ref_key->offset);
5227         }
5228 out:
5229         btrfs_release_path(&path);
5230         return err;
5231 }
5232
5233 /*
5234  * Check all fs/file tree in low_memory mode.
5235  *
5236  * 1. for fs tree root item, call check_fs_root_v2()
5237  * 2. for fs tree root ref/backref, call check_root_ref()
5238  *
5239  * Return 0 if no error occurred.
5240  */
5241 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5242 {
5243         struct btrfs_root *tree_root = fs_info->tree_root;
5244         struct btrfs_root *cur_root = NULL;
5245         struct btrfs_path path;
5246         struct btrfs_key key;
5247         struct extent_buffer *node;
5248         unsigned int ext_ref;
5249         int slot;
5250         int ret;
5251         int err = 0;
5252
5253         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5254
5255         btrfs_init_path(&path);
5256         key.objectid = BTRFS_FS_TREE_OBJECTID;
5257         key.offset = 0;
5258         key.type = BTRFS_ROOT_ITEM_KEY;
5259
5260         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5261         if (ret < 0) {
5262                 err = ret;
5263                 goto out;
5264         } else if (ret > 0) {
5265                 err = -ENOENT;
5266                 goto out;
5267         }
5268
5269         while (1) {
5270                 node = path.nodes[0];
5271                 slot = path.slots[0];
5272                 btrfs_item_key_to_cpu(node, &key, slot);
5273                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5274                         goto out;
5275                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5276                     fs_root_objectid(key.objectid)) {
5277                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5278                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5279                                                                        &key);
5280                         } else {
5281                                 key.offset = (u64)-1;
5282                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5283                         }
5284
5285                         if (IS_ERR(cur_root)) {
5286                                 error("Fail to read fs/subvol tree: %lld",
5287                                       key.objectid);
5288                                 err = -EIO;
5289                                 goto next;
5290                         }
5291
5292                         ret = check_fs_root_v2(cur_root, ext_ref);
5293                         err |= ret;
5294
5295                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5296                                 btrfs_free_fs_root(cur_root);
5297                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5298                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5299                         ret = check_root_ref(tree_root, &key, node, slot);
5300                         err |= ret;
5301                 }
5302 next:
5303                 ret = btrfs_next_item(tree_root, &path);
5304                 if (ret > 0)
5305                         goto out;
5306                 if (ret < 0) {
5307                         err = ret;
5308                         goto out;
5309                 }
5310         }
5311
5312 out:
5313         btrfs_release_path(&path);
5314         return err;
5315 }
5316
5317 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5318                           struct cache_tree *root_cache)
5319 {
5320         int ret;
5321
5322         if (!ctx.progress_enabled)
5323                 fprintf(stderr, "checking fs roots\n");
5324         if (check_mode == CHECK_MODE_LOWMEM)
5325                 ret = check_fs_roots_v2(fs_info);
5326         else
5327                 ret = check_fs_roots(fs_info, root_cache);
5328
5329         return ret;
5330 }
5331
5332 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5333 {
5334         struct list_head *cur = rec->backrefs.next;
5335         struct extent_backref *back;
5336         struct tree_backref *tback;
5337         struct data_backref *dback;
5338         u64 found = 0;
5339         int err = 0;
5340
5341         while(cur != &rec->backrefs) {
5342                 back = to_extent_backref(cur);
5343                 cur = cur->next;
5344                 if (!back->found_extent_tree) {
5345                         err = 1;
5346                         if (!print_errs)
5347                                 goto out;
5348                         if (back->is_data) {
5349                                 dback = to_data_backref(back);
5350                                 fprintf(stderr, "Backref %llu %s %llu"
5351                                         " owner %llu offset %llu num_refs %lu"
5352                                         " not found in extent tree\n",
5353                                         (unsigned long long)rec->start,
5354                                         back->full_backref ?
5355                                         "parent" : "root",
5356                                         back->full_backref ?
5357                                         (unsigned long long)dback->parent:
5358                                         (unsigned long long)dback->root,
5359                                         (unsigned long long)dback->owner,
5360                                         (unsigned long long)dback->offset,
5361                                         (unsigned long)dback->num_refs);
5362                         } else {
5363                                 tback = to_tree_backref(back);
5364                                 fprintf(stderr, "Backref %llu parent %llu"
5365                                         " root %llu not found in extent tree\n",
5366                                         (unsigned long long)rec->start,
5367                                         (unsigned long long)tback->parent,
5368                                         (unsigned long long)tback->root);
5369                         }
5370                 }
5371                 if (!back->is_data && !back->found_ref) {
5372                         err = 1;
5373                         if (!print_errs)
5374                                 goto out;
5375                         tback = to_tree_backref(back);
5376                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5377                                 (unsigned long long)rec->start,
5378                                 back->full_backref ? "parent" : "root",
5379                                 back->full_backref ?
5380                                 (unsigned long long)tback->parent :
5381                                 (unsigned long long)tback->root, back);
5382                 }
5383                 if (back->is_data) {
5384                         dback = to_data_backref(back);
5385                         if (dback->found_ref != dback->num_refs) {
5386                                 err = 1;
5387                                 if (!print_errs)
5388                                         goto out;
5389                                 fprintf(stderr, "Incorrect local backref count"
5390                                         " on %llu %s %llu owner %llu"
5391                                         " offset %llu found %u wanted %u back %p\n",
5392                                         (unsigned long long)rec->start,
5393                                         back->full_backref ?
5394                                         "parent" : "root",
5395                                         back->full_backref ?
5396                                         (unsigned long long)dback->parent:
5397                                         (unsigned long long)dback->root,
5398                                         (unsigned long long)dback->owner,
5399                                         (unsigned long long)dback->offset,
5400                                         dback->found_ref, dback->num_refs, back);
5401                         }
5402                         if (dback->disk_bytenr != rec->start) {
5403                                 err = 1;
5404                                 if (!print_errs)
5405                                         goto out;
5406                                 fprintf(stderr, "Backref disk bytenr does not"
5407                                         " match extent record, bytenr=%llu, "
5408                                         "ref bytenr=%llu\n",
5409                                         (unsigned long long)rec->start,
5410                                         (unsigned long long)dback->disk_bytenr);
5411                         }
5412
5413                         if (dback->bytes != rec->nr) {
5414                                 err = 1;
5415                                 if (!print_errs)
5416                                         goto out;
5417                                 fprintf(stderr, "Backref bytes do not match "
5418                                         "extent backref, bytenr=%llu, ref "
5419                                         "bytes=%llu, backref bytes=%llu\n",
5420                                         (unsigned long long)rec->start,
5421                                         (unsigned long long)rec->nr,
5422                                         (unsigned long long)dback->bytes);
5423                         }
5424                 }
5425                 if (!back->is_data) {
5426                         found += 1;
5427                 } else {
5428                         dback = to_data_backref(back);
5429                         found += dback->found_ref;
5430                 }
5431         }
5432         if (found != rec->refs) {
5433                 err = 1;
5434                 if (!print_errs)
5435                         goto out;
5436                 fprintf(stderr, "Incorrect global backref count "
5437                         "on %llu found %llu wanted %llu\n",
5438                         (unsigned long long)rec->start,
5439                         (unsigned long long)found,
5440                         (unsigned long long)rec->refs);
5441         }
5442 out:
5443         return err;
5444 }
5445
5446 static int free_all_extent_backrefs(struct extent_record *rec)
5447 {
5448         struct extent_backref *back;
5449         struct list_head *cur;
5450         while (!list_empty(&rec->backrefs)) {
5451                 cur = rec->backrefs.next;
5452                 back = to_extent_backref(cur);
5453                 list_del(cur);
5454                 free(back);
5455         }
5456         return 0;
5457 }
5458
5459 static void free_extent_record_cache(struct cache_tree *extent_cache)
5460 {
5461         struct cache_extent *cache;
5462         struct extent_record *rec;
5463
5464         while (1) {
5465                 cache = first_cache_extent(extent_cache);
5466                 if (!cache)
5467                         break;
5468                 rec = container_of(cache, struct extent_record, cache);
5469                 remove_cache_extent(extent_cache, cache);
5470                 free_all_extent_backrefs(rec);
5471                 free(rec);
5472         }
5473 }
5474
5475 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5476                                  struct extent_record *rec)
5477 {
5478         if (rec->content_checked && rec->owner_ref_checked &&
5479             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5480             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5481             !rec->bad_full_backref && !rec->crossing_stripes &&
5482             !rec->wrong_chunk_type) {
5483                 remove_cache_extent(extent_cache, &rec->cache);
5484                 free_all_extent_backrefs(rec);
5485                 list_del_init(&rec->list);
5486                 free(rec);
5487         }
5488         return 0;
5489 }
5490
5491 static int check_owner_ref(struct btrfs_root *root,
5492                             struct extent_record *rec,
5493                             struct extent_buffer *buf)
5494 {
5495         struct extent_backref *node;
5496         struct tree_backref *back;
5497         struct btrfs_root *ref_root;
5498         struct btrfs_key key;
5499         struct btrfs_path path;
5500         struct extent_buffer *parent;
5501         int level;
5502         int found = 0;
5503         int ret;
5504
5505         list_for_each_entry(node, &rec->backrefs, list) {
5506                 if (node->is_data)
5507                         continue;
5508                 if (!node->found_ref)
5509                         continue;
5510                 if (node->full_backref)
5511                         continue;
5512                 back = to_tree_backref(node);
5513                 if (btrfs_header_owner(buf) == back->root)
5514                         return 0;
5515         }
5516         BUG_ON(rec->is_root);
5517
5518         /* try to find the block by search corresponding fs tree */
5519         key.objectid = btrfs_header_owner(buf);
5520         key.type = BTRFS_ROOT_ITEM_KEY;
5521         key.offset = (u64)-1;
5522
5523         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5524         if (IS_ERR(ref_root))
5525                 return 1;
5526
5527         level = btrfs_header_level(buf);
5528         if (level == 0)
5529                 btrfs_item_key_to_cpu(buf, &key, 0);
5530         else
5531                 btrfs_node_key_to_cpu(buf, &key, 0);
5532
5533         btrfs_init_path(&path);
5534         path.lowest_level = level + 1;
5535         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5536         if (ret < 0)
5537                 return 0;
5538
5539         parent = path.nodes[level + 1];
5540         if (parent && buf->start == btrfs_node_blockptr(parent,
5541                                                         path.slots[level + 1]))
5542                 found = 1;
5543
5544         btrfs_release_path(&path);
5545         return found ? 0 : 1;
5546 }
5547
5548 static int is_extent_tree_record(struct extent_record *rec)
5549 {
5550         struct list_head *cur = rec->backrefs.next;
5551         struct extent_backref *node;
5552         struct tree_backref *back;
5553         int is_extent = 0;
5554
5555         while(cur != &rec->backrefs) {
5556                 node = to_extent_backref(cur);
5557                 cur = cur->next;
5558                 if (node->is_data)
5559                         return 0;
5560                 back = to_tree_backref(node);
5561                 if (node->full_backref)
5562                         return 0;
5563                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5564                         is_extent = 1;
5565         }
5566         return is_extent;
5567 }
5568
5569
5570 static int record_bad_block_io(struct btrfs_fs_info *info,
5571                                struct cache_tree *extent_cache,
5572                                u64 start, u64 len)
5573 {
5574         struct extent_record *rec;
5575         struct cache_extent *cache;
5576         struct btrfs_key key;
5577
5578         cache = lookup_cache_extent(extent_cache, start, len);
5579         if (!cache)
5580                 return 0;
5581
5582         rec = container_of(cache, struct extent_record, cache);
5583         if (!is_extent_tree_record(rec))
5584                 return 0;
5585
5586         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5587         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5588 }
5589
5590 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5591                        struct extent_buffer *buf, int slot)
5592 {
5593         if (btrfs_header_level(buf)) {
5594                 struct btrfs_key_ptr ptr1, ptr2;
5595
5596                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5597                                    sizeof(struct btrfs_key_ptr));
5598                 read_extent_buffer(buf, &ptr2,
5599                                    btrfs_node_key_ptr_offset(slot + 1),
5600                                    sizeof(struct btrfs_key_ptr));
5601                 write_extent_buffer(buf, &ptr1,
5602                                     btrfs_node_key_ptr_offset(slot + 1),
5603                                     sizeof(struct btrfs_key_ptr));
5604                 write_extent_buffer(buf, &ptr2,
5605                                     btrfs_node_key_ptr_offset(slot),
5606                                     sizeof(struct btrfs_key_ptr));
5607                 if (slot == 0) {
5608                         struct btrfs_disk_key key;
5609                         btrfs_node_key(buf, &key, 0);
5610                         btrfs_fixup_low_keys(root, path, &key,
5611                                              btrfs_header_level(buf) + 1);
5612                 }
5613         } else {
5614                 struct btrfs_item *item1, *item2;
5615                 struct btrfs_key k1, k2;
5616                 char *item1_data, *item2_data;
5617                 u32 item1_offset, item2_offset, item1_size, item2_size;
5618
5619                 item1 = btrfs_item_nr(slot);
5620                 item2 = btrfs_item_nr(slot + 1);
5621                 btrfs_item_key_to_cpu(buf, &k1, slot);
5622                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5623                 item1_offset = btrfs_item_offset(buf, item1);
5624                 item2_offset = btrfs_item_offset(buf, item2);
5625                 item1_size = btrfs_item_size(buf, item1);
5626                 item2_size = btrfs_item_size(buf, item2);
5627
5628                 item1_data = malloc(item1_size);
5629                 if (!item1_data)
5630                         return -ENOMEM;
5631                 item2_data = malloc(item2_size);
5632                 if (!item2_data) {
5633                         free(item1_data);
5634                         return -ENOMEM;
5635                 }
5636
5637                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5638                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5639
5640                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5641                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5642                 free(item1_data);
5643                 free(item2_data);
5644
5645                 btrfs_set_item_offset(buf, item1, item2_offset);
5646                 btrfs_set_item_offset(buf, item2, item1_offset);
5647                 btrfs_set_item_size(buf, item1, item2_size);
5648                 btrfs_set_item_size(buf, item2, item1_size);
5649
5650                 path->slots[0] = slot;
5651                 btrfs_set_item_key_unsafe(root, path, &k2);
5652                 path->slots[0] = slot + 1;
5653                 btrfs_set_item_key_unsafe(root, path, &k1);
5654         }
5655         return 0;
5656 }
5657
5658 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5659 {
5660         struct extent_buffer *buf;
5661         struct btrfs_key k1, k2;
5662         int i;
5663         int level = path->lowest_level;
5664         int ret = -EIO;
5665
5666         buf = path->nodes[level];
5667         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5668                 if (level) {
5669                         btrfs_node_key_to_cpu(buf, &k1, i);
5670                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5671                 } else {
5672                         btrfs_item_key_to_cpu(buf, &k1, i);
5673                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5674                 }
5675                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5676                         continue;
5677                 ret = swap_values(root, path, buf, i);
5678                 if (ret)
5679                         break;
5680                 btrfs_mark_buffer_dirty(buf);
5681                 i = 0;
5682         }
5683         return ret;
5684 }
5685
5686 static int delete_bogus_item(struct btrfs_root *root,
5687                              struct btrfs_path *path,
5688                              struct extent_buffer *buf, int slot)
5689 {
5690         struct btrfs_key key;
5691         int nritems = btrfs_header_nritems(buf);
5692
5693         btrfs_item_key_to_cpu(buf, &key, slot);
5694
5695         /* These are all the keys we can deal with missing. */
5696         if (key.type != BTRFS_DIR_INDEX_KEY &&
5697             key.type != BTRFS_EXTENT_ITEM_KEY &&
5698             key.type != BTRFS_METADATA_ITEM_KEY &&
5699             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5700             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5701                 return -1;
5702
5703         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5704                (unsigned long long)key.objectid, key.type,
5705                (unsigned long long)key.offset, slot, buf->start);
5706         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5707                               btrfs_item_nr_offset(slot + 1),
5708                               sizeof(struct btrfs_item) *
5709                               (nritems - slot - 1));
5710         btrfs_set_header_nritems(buf, nritems - 1);
5711         if (slot == 0) {
5712                 struct btrfs_disk_key disk_key;
5713
5714                 btrfs_item_key(buf, &disk_key, 0);
5715                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5716         }
5717         btrfs_mark_buffer_dirty(buf);
5718         return 0;
5719 }
5720
5721 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5722 {
5723         struct extent_buffer *buf;
5724         int i;
5725         int ret = 0;
5726
5727         /* We should only get this for leaves */
5728         BUG_ON(path->lowest_level);
5729         buf = path->nodes[0];
5730 again:
5731         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5732                 unsigned int shift = 0, offset;
5733
5734                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5735                     BTRFS_LEAF_DATA_SIZE(root)) {
5736                         if (btrfs_item_end_nr(buf, i) >
5737                             BTRFS_LEAF_DATA_SIZE(root)) {
5738                                 ret = delete_bogus_item(root, path, buf, i);
5739                                 if (!ret)
5740                                         goto again;
5741                                 fprintf(stderr, "item is off the end of the "
5742                                         "leaf, can't fix\n");
5743                                 ret = -EIO;
5744                                 break;
5745                         }
5746                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5747                                 btrfs_item_end_nr(buf, i);
5748                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5749                            btrfs_item_offset_nr(buf, i - 1)) {
5750                         if (btrfs_item_end_nr(buf, i) >
5751                             btrfs_item_offset_nr(buf, i - 1)) {
5752                                 ret = delete_bogus_item(root, path, buf, i);
5753                                 if (!ret)
5754                                         goto again;
5755                                 fprintf(stderr, "items overlap, can't fix\n");
5756                                 ret = -EIO;
5757                                 break;
5758                         }
5759                         shift = btrfs_item_offset_nr(buf, i - 1) -
5760                                 btrfs_item_end_nr(buf, i);
5761                 }
5762                 if (!shift)
5763                         continue;
5764
5765                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5766                        i, shift, (unsigned long long)buf->start);
5767                 offset = btrfs_item_offset_nr(buf, i);
5768                 memmove_extent_buffer(buf,
5769                                       btrfs_leaf_data(buf) + offset + shift,
5770                                       btrfs_leaf_data(buf) + offset,
5771                                       btrfs_item_size_nr(buf, i));
5772                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5773                                       offset + shift);
5774                 btrfs_mark_buffer_dirty(buf);
5775         }
5776
5777         /*
5778          * We may have moved things, in which case we want to exit so we don't
5779          * write those changes out.  Once we have proper abort functionality in
5780          * progs this can be changed to something nicer.
5781          */
5782         BUG_ON(ret);
5783         return ret;
5784 }
5785
5786 /*
5787  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5788  * then just return -EIO.
5789  */
5790 static int try_to_fix_bad_block(struct btrfs_root *root,
5791                                 struct extent_buffer *buf,
5792                                 enum btrfs_tree_block_status status)
5793 {
5794         struct btrfs_trans_handle *trans;
5795         struct ulist *roots;
5796         struct ulist_node *node;
5797         struct btrfs_root *search_root;
5798         struct btrfs_path path;
5799         struct ulist_iterator iter;
5800         struct btrfs_key root_key, key;
5801         int ret;
5802
5803         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5804             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5805                 return -EIO;
5806
5807         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5808         if (ret)
5809                 return -EIO;
5810
5811         btrfs_init_path(&path);
5812         ULIST_ITER_INIT(&iter);
5813         while ((node = ulist_next(roots, &iter))) {
5814                 root_key.objectid = node->val;
5815                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5816                 root_key.offset = (u64)-1;
5817
5818                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5819                 if (IS_ERR(root)) {
5820                         ret = -EIO;
5821                         break;
5822                 }
5823
5824
5825                 trans = btrfs_start_transaction(search_root, 0);
5826                 if (IS_ERR(trans)) {
5827                         ret = PTR_ERR(trans);
5828                         break;
5829                 }
5830
5831                 path.lowest_level = btrfs_header_level(buf);
5832                 path.skip_check_block = 1;
5833                 if (path.lowest_level)
5834                         btrfs_node_key_to_cpu(buf, &key, 0);
5835                 else
5836                         btrfs_item_key_to_cpu(buf, &key, 0);
5837                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5838                 if (ret) {
5839                         ret = -EIO;
5840                         btrfs_commit_transaction(trans, search_root);
5841                         break;
5842                 }
5843                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5844                         ret = fix_key_order(search_root, &path);
5845                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5846                         ret = fix_item_offset(search_root, &path);
5847                 if (ret) {
5848                         btrfs_commit_transaction(trans, search_root);
5849                         break;
5850                 }
5851                 btrfs_release_path(&path);
5852                 btrfs_commit_transaction(trans, search_root);
5853         }
5854         ulist_free(roots);
5855         btrfs_release_path(&path);
5856         return ret;
5857 }
5858
5859 static int check_block(struct btrfs_root *root,
5860                        struct cache_tree *extent_cache,
5861                        struct extent_buffer *buf, u64 flags)
5862 {
5863         struct extent_record *rec;
5864         struct cache_extent *cache;
5865         struct btrfs_key key;
5866         enum btrfs_tree_block_status status;
5867         int ret = 0;
5868         int level;
5869
5870         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5871         if (!cache)
5872                 return 1;
5873         rec = container_of(cache, struct extent_record, cache);
5874         rec->generation = btrfs_header_generation(buf);
5875
5876         level = btrfs_header_level(buf);
5877         if (btrfs_header_nritems(buf) > 0) {
5878
5879                 if (level == 0)
5880                         btrfs_item_key_to_cpu(buf, &key, 0);
5881                 else
5882                         btrfs_node_key_to_cpu(buf, &key, 0);
5883
5884                 rec->info_objectid = key.objectid;
5885         }
5886         rec->info_level = level;
5887
5888         if (btrfs_is_leaf(buf))
5889                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5890         else
5891                 status = btrfs_check_node(root, &rec->parent_key, buf);
5892
5893         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5894                 if (repair)
5895                         status = try_to_fix_bad_block(root, buf, status);
5896                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5897                         ret = -EIO;
5898                         fprintf(stderr, "bad block %llu\n",
5899                                 (unsigned long long)buf->start);
5900                 } else {
5901                         /*
5902                          * Signal to callers we need to start the scan over
5903                          * again since we'll have cowed blocks.
5904                          */
5905                         ret = -EAGAIN;
5906                 }
5907         } else {
5908                 rec->content_checked = 1;
5909                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5910                         rec->owner_ref_checked = 1;
5911                 else {
5912                         ret = check_owner_ref(root, rec, buf);
5913                         if (!ret)
5914                                 rec->owner_ref_checked = 1;
5915                 }
5916         }
5917         if (!ret)
5918                 maybe_free_extent_rec(extent_cache, rec);
5919         return ret;
5920 }
5921
5922 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5923                                                 u64 parent, u64 root)
5924 {
5925         struct list_head *cur = rec->backrefs.next;
5926         struct extent_backref *node;
5927         struct tree_backref *back;
5928
5929         while(cur != &rec->backrefs) {
5930                 node = to_extent_backref(cur);
5931                 cur = cur->next;
5932                 if (node->is_data)
5933                         continue;
5934                 back = to_tree_backref(node);
5935                 if (parent > 0) {
5936                         if (!node->full_backref)
5937                                 continue;
5938                         if (parent == back->parent)
5939                                 return back;
5940                 } else {
5941                         if (node->full_backref)
5942                                 continue;
5943                         if (back->root == root)
5944                                 return back;
5945                 }
5946         }
5947         return NULL;
5948 }
5949
5950 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5951                                                 u64 parent, u64 root)
5952 {
5953         struct tree_backref *ref = malloc(sizeof(*ref));
5954
5955         if (!ref)
5956                 return NULL;
5957         memset(&ref->node, 0, sizeof(ref->node));
5958         if (parent > 0) {
5959                 ref->parent = parent;
5960                 ref->node.full_backref = 1;
5961         } else {
5962                 ref->root = root;
5963                 ref->node.full_backref = 0;
5964         }
5965         list_add_tail(&ref->node.list, &rec->backrefs);
5966
5967         return ref;
5968 }
5969
5970 static struct data_backref *find_data_backref(struct extent_record *rec,
5971                                                 u64 parent, u64 root,
5972                                                 u64 owner, u64 offset,
5973                                                 int found_ref,
5974                                                 u64 disk_bytenr, u64 bytes)
5975 {
5976         struct list_head *cur = rec->backrefs.next;
5977         struct extent_backref *node;
5978         struct data_backref *back;
5979
5980         while(cur != &rec->backrefs) {
5981                 node = to_extent_backref(cur);
5982                 cur = cur->next;
5983                 if (!node->is_data)
5984                         continue;
5985                 back = to_data_backref(node);
5986                 if (parent > 0) {
5987                         if (!node->full_backref)
5988                                 continue;
5989                         if (parent == back->parent)
5990                                 return back;
5991                 } else {
5992                         if (node->full_backref)
5993                                 continue;
5994                         if (back->root == root && back->owner == owner &&
5995                             back->offset == offset) {
5996                                 if (found_ref && node->found_ref &&
5997                                     (back->bytes != bytes ||
5998                                     back->disk_bytenr != disk_bytenr))
5999                                         continue;
6000                                 return back;
6001                         }
6002                 }
6003         }
6004         return NULL;
6005 }
6006
6007 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6008                                                 u64 parent, u64 root,
6009                                                 u64 owner, u64 offset,
6010                                                 u64 max_size)
6011 {
6012         struct data_backref *ref = malloc(sizeof(*ref));
6013
6014         if (!ref)
6015                 return NULL;
6016         memset(&ref->node, 0, sizeof(ref->node));
6017         ref->node.is_data = 1;
6018
6019         if (parent > 0) {
6020                 ref->parent = parent;
6021                 ref->owner = 0;
6022                 ref->offset = 0;
6023                 ref->node.full_backref = 1;
6024         } else {
6025                 ref->root = root;
6026                 ref->owner = owner;
6027                 ref->offset = offset;
6028                 ref->node.full_backref = 0;
6029         }
6030         ref->bytes = max_size;
6031         ref->found_ref = 0;
6032         ref->num_refs = 0;
6033         list_add_tail(&ref->node.list, &rec->backrefs);
6034         if (max_size > rec->max_size)
6035                 rec->max_size = max_size;
6036         return ref;
6037 }
6038
6039 /* Check if the type of extent matches with its chunk */
6040 static void check_extent_type(struct extent_record *rec)
6041 {
6042         struct btrfs_block_group_cache *bg_cache;
6043
6044         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6045         if (!bg_cache)
6046                 return;
6047
6048         /* data extent, check chunk directly*/
6049         if (!rec->metadata) {
6050                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6051                         rec->wrong_chunk_type = 1;
6052                 return;
6053         }
6054
6055         /* metadata extent, check the obvious case first */
6056         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6057                                  BTRFS_BLOCK_GROUP_METADATA))) {
6058                 rec->wrong_chunk_type = 1;
6059                 return;
6060         }
6061
6062         /*
6063          * Check SYSTEM extent, as it's also marked as metadata, we can only
6064          * make sure it's a SYSTEM extent by its backref
6065          */
6066         if (!list_empty(&rec->backrefs)) {
6067                 struct extent_backref *node;
6068                 struct tree_backref *tback;
6069                 u64 bg_type;
6070
6071                 node = to_extent_backref(rec->backrefs.next);
6072                 if (node->is_data) {
6073                         /* tree block shouldn't have data backref */
6074                         rec->wrong_chunk_type = 1;
6075                         return;
6076                 }
6077                 tback = container_of(node, struct tree_backref, node);
6078
6079                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6080                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6081                 else
6082                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6083                 if (!(bg_cache->flags & bg_type))
6084                         rec->wrong_chunk_type = 1;
6085         }
6086 }
6087
6088 /*
6089  * Allocate a new extent record, fill default values from @tmpl and insert int
6090  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6091  * the cache, otherwise it fails.
6092  */
6093 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6094                 struct extent_record *tmpl)
6095 {
6096         struct extent_record *rec;
6097         int ret = 0;
6098
6099         BUG_ON(tmpl->max_size == 0);
6100         rec = malloc(sizeof(*rec));
6101         if (!rec)
6102                 return -ENOMEM;
6103         rec->start = tmpl->start;
6104         rec->max_size = tmpl->max_size;
6105         rec->nr = max(tmpl->nr, tmpl->max_size);
6106         rec->found_rec = tmpl->found_rec;
6107         rec->content_checked = tmpl->content_checked;
6108         rec->owner_ref_checked = tmpl->owner_ref_checked;
6109         rec->num_duplicates = 0;
6110         rec->metadata = tmpl->metadata;
6111         rec->flag_block_full_backref = FLAG_UNSET;
6112         rec->bad_full_backref = 0;
6113         rec->crossing_stripes = 0;
6114         rec->wrong_chunk_type = 0;
6115         rec->is_root = tmpl->is_root;
6116         rec->refs = tmpl->refs;
6117         rec->extent_item_refs = tmpl->extent_item_refs;
6118         rec->parent_generation = tmpl->parent_generation;
6119         INIT_LIST_HEAD(&rec->backrefs);
6120         INIT_LIST_HEAD(&rec->dups);
6121         INIT_LIST_HEAD(&rec->list);
6122         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6123         rec->cache.start = tmpl->start;
6124         rec->cache.size = tmpl->nr;
6125         ret = insert_cache_extent(extent_cache, &rec->cache);
6126         if (ret) {
6127                 free(rec);
6128                 return ret;
6129         }
6130         bytes_used += rec->nr;
6131
6132         if (tmpl->metadata)
6133                 rec->crossing_stripes = check_crossing_stripes(global_info,
6134                                 rec->start, global_info->nodesize);
6135         check_extent_type(rec);
6136         return ret;
6137 }
6138
6139 /*
6140  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6141  * some are hints:
6142  * - refs              - if found, increase refs
6143  * - is_root           - if found, set
6144  * - content_checked   - if found, set
6145  * - owner_ref_checked - if found, set
6146  *
6147  * If not found, create a new one, initialize and insert.
6148  */
6149 static int add_extent_rec(struct cache_tree *extent_cache,
6150                 struct extent_record *tmpl)
6151 {
6152         struct extent_record *rec;
6153         struct cache_extent *cache;
6154         int ret = 0;
6155         int dup = 0;
6156
6157         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6158         if (cache) {
6159                 rec = container_of(cache, struct extent_record, cache);
6160                 if (tmpl->refs)
6161                         rec->refs++;
6162                 if (rec->nr == 1)
6163                         rec->nr = max(tmpl->nr, tmpl->max_size);
6164
6165                 /*
6166                  * We need to make sure to reset nr to whatever the extent
6167                  * record says was the real size, this way we can compare it to
6168                  * the backrefs.
6169                  */
6170                 if (tmpl->found_rec) {
6171                         if (tmpl->start != rec->start || rec->found_rec) {
6172                                 struct extent_record *tmp;
6173
6174                                 dup = 1;
6175                                 if (list_empty(&rec->list))
6176                                         list_add_tail(&rec->list,
6177                                                       &duplicate_extents);
6178
6179                                 /*
6180                                  * We have to do this song and dance in case we
6181                                  * find an extent record that falls inside of
6182                                  * our current extent record but does not have
6183                                  * the same objectid.
6184                                  */
6185                                 tmp = malloc(sizeof(*tmp));
6186                                 if (!tmp)
6187                                         return -ENOMEM;
6188                                 tmp->start = tmpl->start;
6189                                 tmp->max_size = tmpl->max_size;
6190                                 tmp->nr = tmpl->nr;
6191                                 tmp->found_rec = 1;
6192                                 tmp->metadata = tmpl->metadata;
6193                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6194                                 INIT_LIST_HEAD(&tmp->list);
6195                                 list_add_tail(&tmp->list, &rec->dups);
6196                                 rec->num_duplicates++;
6197                         } else {
6198                                 rec->nr = tmpl->nr;
6199                                 rec->found_rec = 1;
6200                         }
6201                 }
6202
6203                 if (tmpl->extent_item_refs && !dup) {
6204                         if (rec->extent_item_refs) {
6205                                 fprintf(stderr, "block %llu rec "
6206                                         "extent_item_refs %llu, passed %llu\n",
6207                                         (unsigned long long)tmpl->start,
6208                                         (unsigned long long)
6209                                                         rec->extent_item_refs,
6210                                         (unsigned long long)tmpl->extent_item_refs);
6211                         }
6212                         rec->extent_item_refs = tmpl->extent_item_refs;
6213                 }
6214                 if (tmpl->is_root)
6215                         rec->is_root = 1;
6216                 if (tmpl->content_checked)
6217                         rec->content_checked = 1;
6218                 if (tmpl->owner_ref_checked)
6219                         rec->owner_ref_checked = 1;
6220                 memcpy(&rec->parent_key, &tmpl->parent_key,
6221                                 sizeof(tmpl->parent_key));
6222                 if (tmpl->parent_generation)
6223                         rec->parent_generation = tmpl->parent_generation;
6224                 if (rec->max_size < tmpl->max_size)
6225                         rec->max_size = tmpl->max_size;
6226
6227                 /*
6228                  * A metadata extent can't cross stripe_len boundary, otherwise
6229                  * kernel scrub won't be able to handle it.
6230                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6231                  * it.
6232                  */
6233                 if (tmpl->metadata)
6234                         rec->crossing_stripes = check_crossing_stripes(
6235                                         global_info, rec->start,
6236                                         global_info->nodesize);
6237                 check_extent_type(rec);
6238                 maybe_free_extent_rec(extent_cache, rec);
6239                 return ret;
6240         }
6241
6242         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6243
6244         return ret;
6245 }
6246
6247 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6248                             u64 parent, u64 root, int found_ref)
6249 {
6250         struct extent_record *rec;
6251         struct tree_backref *back;
6252         struct cache_extent *cache;
6253         int ret;
6254
6255         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6256         if (!cache) {
6257                 struct extent_record tmpl;
6258
6259                 memset(&tmpl, 0, sizeof(tmpl));
6260                 tmpl.start = bytenr;
6261                 tmpl.nr = 1;
6262                 tmpl.metadata = 1;
6263                 tmpl.max_size = 1;
6264
6265                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6266                 if (ret)
6267                         return ret;
6268
6269                 /* really a bug in cache_extent implement now */
6270                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6271                 if (!cache)
6272                         return -ENOENT;
6273         }
6274
6275         rec = container_of(cache, struct extent_record, cache);
6276         if (rec->start != bytenr) {
6277                 /*
6278                  * Several cause, from unaligned bytenr to over lapping extents
6279                  */
6280                 return -EEXIST;
6281         }
6282
6283         back = find_tree_backref(rec, parent, root);
6284         if (!back) {
6285                 back = alloc_tree_backref(rec, parent, root);
6286                 if (!back)
6287                         return -ENOMEM;
6288         }
6289
6290         if (found_ref) {
6291                 if (back->node.found_ref) {
6292                         fprintf(stderr, "Extent back ref already exists "
6293                                 "for %llu parent %llu root %llu \n",
6294                                 (unsigned long long)bytenr,
6295                                 (unsigned long long)parent,
6296                                 (unsigned long long)root);
6297                 }
6298                 back->node.found_ref = 1;
6299         } else {
6300                 if (back->node.found_extent_tree) {
6301                         fprintf(stderr, "Extent back ref already exists "
6302                                 "for %llu parent %llu root %llu \n",
6303                                 (unsigned long long)bytenr,
6304                                 (unsigned long long)parent,
6305                                 (unsigned long long)root);
6306                 }
6307                 back->node.found_extent_tree = 1;
6308         }
6309         check_extent_type(rec);
6310         maybe_free_extent_rec(extent_cache, rec);
6311         return 0;
6312 }
6313
6314 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6315                             u64 parent, u64 root, u64 owner, u64 offset,
6316                             u32 num_refs, int found_ref, u64 max_size)
6317 {
6318         struct extent_record *rec;
6319         struct data_backref *back;
6320         struct cache_extent *cache;
6321         int ret;
6322
6323         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6324         if (!cache) {
6325                 struct extent_record tmpl;
6326
6327                 memset(&tmpl, 0, sizeof(tmpl));
6328                 tmpl.start = bytenr;
6329                 tmpl.nr = 1;
6330                 tmpl.max_size = max_size;
6331
6332                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6333                 if (ret)
6334                         return ret;
6335
6336                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6337                 if (!cache)
6338                         abort();
6339         }
6340
6341         rec = container_of(cache, struct extent_record, cache);
6342         if (rec->max_size < max_size)
6343                 rec->max_size = max_size;
6344
6345         /*
6346          * If found_ref is set then max_size is the real size and must match the
6347          * existing refs.  So if we have already found a ref then we need to
6348          * make sure that this ref matches the existing one, otherwise we need
6349          * to add a new backref so we can notice that the backrefs don't match
6350          * and we need to figure out who is telling the truth.  This is to
6351          * account for that awful fsync bug I introduced where we'd end up with
6352          * a btrfs_file_extent_item that would have its length include multiple
6353          * prealloc extents or point inside of a prealloc extent.
6354          */
6355         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6356                                  bytenr, max_size);
6357         if (!back) {
6358                 back = alloc_data_backref(rec, parent, root, owner, offset,
6359                                           max_size);
6360                 BUG_ON(!back);
6361         }
6362
6363         if (found_ref) {
6364                 BUG_ON(num_refs != 1);
6365                 if (back->node.found_ref)
6366                         BUG_ON(back->bytes != max_size);
6367                 back->node.found_ref = 1;
6368                 back->found_ref += 1;
6369                 back->bytes = max_size;
6370                 back->disk_bytenr = bytenr;
6371                 rec->refs += 1;
6372                 rec->content_checked = 1;
6373                 rec->owner_ref_checked = 1;
6374         } else {
6375                 if (back->node.found_extent_tree) {
6376                         fprintf(stderr, "Extent back ref already exists "
6377                                 "for %llu parent %llu root %llu "
6378                                 "owner %llu offset %llu num_refs %lu\n",
6379                                 (unsigned long long)bytenr,
6380                                 (unsigned long long)parent,
6381                                 (unsigned long long)root,
6382                                 (unsigned long long)owner,
6383                                 (unsigned long long)offset,
6384                                 (unsigned long)num_refs);
6385                 }
6386                 back->num_refs = num_refs;
6387                 back->node.found_extent_tree = 1;
6388         }
6389         maybe_free_extent_rec(extent_cache, rec);
6390         return 0;
6391 }
6392
6393 static int add_pending(struct cache_tree *pending,
6394                        struct cache_tree *seen, u64 bytenr, u32 size)
6395 {
6396         int ret;
6397         ret = add_cache_extent(seen, bytenr, size);
6398         if (ret)
6399                 return ret;
6400         add_cache_extent(pending, bytenr, size);
6401         return 0;
6402 }
6403
6404 static int pick_next_pending(struct cache_tree *pending,
6405                         struct cache_tree *reada,
6406                         struct cache_tree *nodes,
6407                         u64 last, struct block_info *bits, int bits_nr,
6408                         int *reada_bits)
6409 {
6410         unsigned long node_start = last;
6411         struct cache_extent *cache;
6412         int ret;
6413
6414         cache = search_cache_extent(reada, 0);
6415         if (cache) {
6416                 bits[0].start = cache->start;
6417                 bits[0].size = cache->size;
6418                 *reada_bits = 1;
6419                 return 1;
6420         }
6421         *reada_bits = 0;
6422         if (node_start > 32768)
6423                 node_start -= 32768;
6424
6425         cache = search_cache_extent(nodes, node_start);
6426         if (!cache)
6427                 cache = search_cache_extent(nodes, 0);
6428
6429         if (!cache) {
6430                  cache = search_cache_extent(pending, 0);
6431                  if (!cache)
6432                          return 0;
6433                  ret = 0;
6434                  do {
6435                          bits[ret].start = cache->start;
6436                          bits[ret].size = cache->size;
6437                          cache = next_cache_extent(cache);
6438                          ret++;
6439                  } while (cache && ret < bits_nr);
6440                  return ret;
6441         }
6442
6443         ret = 0;
6444         do {
6445                 bits[ret].start = cache->start;
6446                 bits[ret].size = cache->size;
6447                 cache = next_cache_extent(cache);
6448                 ret++;
6449         } while (cache && ret < bits_nr);
6450
6451         if (bits_nr - ret > 8) {
6452                 u64 lookup = bits[0].start + bits[0].size;
6453                 struct cache_extent *next;
6454                 next = search_cache_extent(pending, lookup);
6455                 while(next) {
6456                         if (next->start - lookup > 32768)
6457                                 break;
6458                         bits[ret].start = next->start;
6459                         bits[ret].size = next->size;
6460                         lookup = next->start + next->size;
6461                         ret++;
6462                         if (ret == bits_nr)
6463                                 break;
6464                         next = next_cache_extent(next);
6465                         if (!next)
6466                                 break;
6467                 }
6468         }
6469         return ret;
6470 }
6471
6472 static void free_chunk_record(struct cache_extent *cache)
6473 {
6474         struct chunk_record *rec;
6475
6476         rec = container_of(cache, struct chunk_record, cache);
6477         list_del_init(&rec->list);
6478         list_del_init(&rec->dextents);
6479         free(rec);
6480 }
6481
6482 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6483 {
6484         cache_tree_free_extents(chunk_cache, free_chunk_record);
6485 }
6486
6487 static void free_device_record(struct rb_node *node)
6488 {
6489         struct device_record *rec;
6490
6491         rec = container_of(node, struct device_record, node);
6492         free(rec);
6493 }
6494
6495 FREE_RB_BASED_TREE(device_cache, free_device_record);
6496
6497 int insert_block_group_record(struct block_group_tree *tree,
6498                               struct block_group_record *bg_rec)
6499 {
6500         int ret;
6501
6502         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6503         if (ret)
6504                 return ret;
6505
6506         list_add_tail(&bg_rec->list, &tree->block_groups);
6507         return 0;
6508 }
6509
6510 static void free_block_group_record(struct cache_extent *cache)
6511 {
6512         struct block_group_record *rec;
6513
6514         rec = container_of(cache, struct block_group_record, cache);
6515         list_del_init(&rec->list);
6516         free(rec);
6517 }
6518
6519 void free_block_group_tree(struct block_group_tree *tree)
6520 {
6521         cache_tree_free_extents(&tree->tree, free_block_group_record);
6522 }
6523
6524 int insert_device_extent_record(struct device_extent_tree *tree,
6525                                 struct device_extent_record *de_rec)
6526 {
6527         int ret;
6528
6529         /*
6530          * Device extent is a bit different from the other extents, because
6531          * the extents which belong to the different devices may have the
6532          * same start and size, so we need use the special extent cache
6533          * search/insert functions.
6534          */
6535         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6536         if (ret)
6537                 return ret;
6538
6539         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6540         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6541         return 0;
6542 }
6543
6544 static void free_device_extent_record(struct cache_extent *cache)
6545 {
6546         struct device_extent_record *rec;
6547
6548         rec = container_of(cache, struct device_extent_record, cache);
6549         if (!list_empty(&rec->chunk_list))
6550                 list_del_init(&rec->chunk_list);
6551         if (!list_empty(&rec->device_list))
6552                 list_del_init(&rec->device_list);
6553         free(rec);
6554 }
6555
6556 void free_device_extent_tree(struct device_extent_tree *tree)
6557 {
6558         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6559 }
6560
6561 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6562 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6563                                  struct extent_buffer *leaf, int slot)
6564 {
6565         struct btrfs_extent_ref_v0 *ref0;
6566         struct btrfs_key key;
6567         int ret;
6568
6569         btrfs_item_key_to_cpu(leaf, &key, slot);
6570         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6571         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6572                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6573                                 0, 0);
6574         } else {
6575                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6576                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6577         }
6578         return ret;
6579 }
6580 #endif
6581
6582 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6583                                             struct btrfs_key *key,
6584                                             int slot)
6585 {
6586         struct btrfs_chunk *ptr;
6587         struct chunk_record *rec;
6588         int num_stripes, i;
6589
6590         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6591         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6592
6593         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6594         if (!rec) {
6595                 fprintf(stderr, "memory allocation failed\n");
6596                 exit(-1);
6597         }
6598
6599         INIT_LIST_HEAD(&rec->list);
6600         INIT_LIST_HEAD(&rec->dextents);
6601         rec->bg_rec = NULL;
6602
6603         rec->cache.start = key->offset;
6604         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6605
6606         rec->generation = btrfs_header_generation(leaf);
6607
6608         rec->objectid = key->objectid;
6609         rec->type = key->type;
6610         rec->offset = key->offset;
6611
6612         rec->length = rec->cache.size;
6613         rec->owner = btrfs_chunk_owner(leaf, ptr);
6614         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6615         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6616         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6617         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6618         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6619         rec->num_stripes = num_stripes;
6620         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6621
6622         for (i = 0; i < rec->num_stripes; ++i) {
6623                 rec->stripes[i].devid =
6624                         btrfs_stripe_devid_nr(leaf, ptr, i);
6625                 rec->stripes[i].offset =
6626                         btrfs_stripe_offset_nr(leaf, ptr, i);
6627                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6628                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6629                                 BTRFS_UUID_SIZE);
6630         }
6631
6632         return rec;
6633 }
6634
6635 static int process_chunk_item(struct cache_tree *chunk_cache,
6636                               struct btrfs_key *key, struct extent_buffer *eb,
6637                               int slot)
6638 {
6639         struct chunk_record *rec;
6640         struct btrfs_chunk *chunk;
6641         int ret = 0;
6642
6643         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6644         /*
6645          * Do extra check for this chunk item,
6646          *
6647          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6648          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6649          * and owner<->key_type check.
6650          */
6651         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6652                                       key->offset);
6653         if (ret < 0) {
6654                 error("chunk(%llu, %llu) is not valid, ignore it",
6655                       key->offset, btrfs_chunk_length(eb, chunk));
6656                 return 0;
6657         }
6658         rec = btrfs_new_chunk_record(eb, key, slot);
6659         ret = insert_cache_extent(chunk_cache, &rec->cache);
6660         if (ret) {
6661                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6662                         rec->offset, rec->length);
6663                 free(rec);
6664         }
6665
6666         return ret;
6667 }
6668
6669 static int process_device_item(struct rb_root *dev_cache,
6670                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6671 {
6672         struct btrfs_dev_item *ptr;
6673         struct device_record *rec;
6674         int ret = 0;
6675
6676         ptr = btrfs_item_ptr(eb,
6677                 slot, struct btrfs_dev_item);
6678
6679         rec = malloc(sizeof(*rec));
6680         if (!rec) {
6681                 fprintf(stderr, "memory allocation failed\n");
6682                 return -ENOMEM;
6683         }
6684
6685         rec->devid = key->offset;
6686         rec->generation = btrfs_header_generation(eb);
6687
6688         rec->objectid = key->objectid;
6689         rec->type = key->type;
6690         rec->offset = key->offset;
6691
6692         rec->devid = btrfs_device_id(eb, ptr);
6693         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6694         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6695
6696         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6697         if (ret) {
6698                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6699                 free(rec);
6700         }
6701
6702         return ret;
6703 }
6704
6705 struct block_group_record *
6706 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6707                              int slot)
6708 {
6709         struct btrfs_block_group_item *ptr;
6710         struct block_group_record *rec;
6711
6712         rec = calloc(1, sizeof(*rec));
6713         if (!rec) {
6714                 fprintf(stderr, "memory allocation failed\n");
6715                 exit(-1);
6716         }
6717
6718         rec->cache.start = key->objectid;
6719         rec->cache.size = key->offset;
6720
6721         rec->generation = btrfs_header_generation(leaf);
6722
6723         rec->objectid = key->objectid;
6724         rec->type = key->type;
6725         rec->offset = key->offset;
6726
6727         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6728         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6729
6730         INIT_LIST_HEAD(&rec->list);
6731
6732         return rec;
6733 }
6734
6735 static int process_block_group_item(struct block_group_tree *block_group_cache,
6736                                     struct btrfs_key *key,
6737                                     struct extent_buffer *eb, int slot)
6738 {
6739         struct block_group_record *rec;
6740         int ret = 0;
6741
6742         rec = btrfs_new_block_group_record(eb, key, slot);
6743         ret = insert_block_group_record(block_group_cache, rec);
6744         if (ret) {
6745                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6746                         rec->objectid, rec->offset);
6747                 free(rec);
6748         }
6749
6750         return ret;
6751 }
6752
6753 struct device_extent_record *
6754 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6755                                struct btrfs_key *key, int slot)
6756 {
6757         struct device_extent_record *rec;
6758         struct btrfs_dev_extent *ptr;
6759
6760         rec = calloc(1, sizeof(*rec));
6761         if (!rec) {
6762                 fprintf(stderr, "memory allocation failed\n");
6763                 exit(-1);
6764         }
6765
6766         rec->cache.objectid = key->objectid;
6767         rec->cache.start = key->offset;
6768
6769         rec->generation = btrfs_header_generation(leaf);
6770
6771         rec->objectid = key->objectid;
6772         rec->type = key->type;
6773         rec->offset = key->offset;
6774
6775         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6776         rec->chunk_objecteid =
6777                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6778         rec->chunk_offset =
6779                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6780         rec->length = btrfs_dev_extent_length(leaf, ptr);
6781         rec->cache.size = rec->length;
6782
6783         INIT_LIST_HEAD(&rec->chunk_list);
6784         INIT_LIST_HEAD(&rec->device_list);
6785
6786         return rec;
6787 }
6788
6789 static int
6790 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6791                            struct btrfs_key *key, struct extent_buffer *eb,
6792                            int slot)
6793 {
6794         struct device_extent_record *rec;
6795         int ret;
6796
6797         rec = btrfs_new_device_extent_record(eb, key, slot);
6798         ret = insert_device_extent_record(dev_extent_cache, rec);
6799         if (ret) {
6800                 fprintf(stderr,
6801                         "Device extent[%llu, %llu, %llu] existed.\n",
6802                         rec->objectid, rec->offset, rec->length);
6803                 free(rec);
6804         }
6805
6806         return ret;
6807 }
6808
6809 static int process_extent_item(struct btrfs_root *root,
6810                                struct cache_tree *extent_cache,
6811                                struct extent_buffer *eb, int slot)
6812 {
6813         struct btrfs_extent_item *ei;
6814         struct btrfs_extent_inline_ref *iref;
6815         struct btrfs_extent_data_ref *dref;
6816         struct btrfs_shared_data_ref *sref;
6817         struct btrfs_key key;
6818         struct extent_record tmpl;
6819         unsigned long end;
6820         unsigned long ptr;
6821         int ret;
6822         int type;
6823         u32 item_size = btrfs_item_size_nr(eb, slot);
6824         u64 refs = 0;
6825         u64 offset;
6826         u64 num_bytes;
6827         int metadata = 0;
6828
6829         btrfs_item_key_to_cpu(eb, &key, slot);
6830
6831         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6832                 metadata = 1;
6833                 num_bytes = root->fs_info->nodesize;
6834         } else {
6835                 num_bytes = key.offset;
6836         }
6837
6838         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6839                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6840                       key.objectid, root->fs_info->sectorsize);
6841                 return -EIO;
6842         }
6843         if (item_size < sizeof(*ei)) {
6844 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6845                 struct btrfs_extent_item_v0 *ei0;
6846                 BUG_ON(item_size != sizeof(*ei0));
6847                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6848                 refs = btrfs_extent_refs_v0(eb, ei0);
6849 #else
6850                 BUG();
6851 #endif
6852                 memset(&tmpl, 0, sizeof(tmpl));
6853                 tmpl.start = key.objectid;
6854                 tmpl.nr = num_bytes;
6855                 tmpl.extent_item_refs = refs;
6856                 tmpl.metadata = metadata;
6857                 tmpl.found_rec = 1;
6858                 tmpl.max_size = num_bytes;
6859
6860                 return add_extent_rec(extent_cache, &tmpl);
6861         }
6862
6863         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6864         refs = btrfs_extent_refs(eb, ei);
6865         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6866                 metadata = 1;
6867         else
6868                 metadata = 0;
6869         if (metadata && num_bytes != root->fs_info->nodesize) {
6870                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6871                       num_bytes, root->fs_info->nodesize);
6872                 return -EIO;
6873         }
6874         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6875                 error("ignore invalid data extent, length %llu is not aligned to %u",
6876                       num_bytes, root->fs_info->sectorsize);
6877                 return -EIO;
6878         }
6879
6880         memset(&tmpl, 0, sizeof(tmpl));
6881         tmpl.start = key.objectid;
6882         tmpl.nr = num_bytes;
6883         tmpl.extent_item_refs = refs;
6884         tmpl.metadata = metadata;
6885         tmpl.found_rec = 1;
6886         tmpl.max_size = num_bytes;
6887         add_extent_rec(extent_cache, &tmpl);
6888
6889         ptr = (unsigned long)(ei + 1);
6890         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6891             key.type == BTRFS_EXTENT_ITEM_KEY)
6892                 ptr += sizeof(struct btrfs_tree_block_info);
6893
6894         end = (unsigned long)ei + item_size;
6895         while (ptr < end) {
6896                 iref = (struct btrfs_extent_inline_ref *)ptr;
6897                 type = btrfs_extent_inline_ref_type(eb, iref);
6898                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6899                 switch (type) {
6900                 case BTRFS_TREE_BLOCK_REF_KEY:
6901                         ret = add_tree_backref(extent_cache, key.objectid,
6902                                         0, offset, 0);
6903                         if (ret < 0)
6904                                 error(
6905                         "add_tree_backref failed (extent items tree block): %s",
6906                                       strerror(-ret));
6907                         break;
6908                 case BTRFS_SHARED_BLOCK_REF_KEY:
6909                         ret = add_tree_backref(extent_cache, key.objectid,
6910                                         offset, 0, 0);
6911                         if (ret < 0)
6912                                 error(
6913                         "add_tree_backref failed (extent items shared block): %s",
6914                                       strerror(-ret));
6915                         break;
6916                 case BTRFS_EXTENT_DATA_REF_KEY:
6917                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6918                         add_data_backref(extent_cache, key.objectid, 0,
6919                                         btrfs_extent_data_ref_root(eb, dref),
6920                                         btrfs_extent_data_ref_objectid(eb,
6921                                                                        dref),
6922                                         btrfs_extent_data_ref_offset(eb, dref),
6923                                         btrfs_extent_data_ref_count(eb, dref),
6924                                         0, num_bytes);
6925                         break;
6926                 case BTRFS_SHARED_DATA_REF_KEY:
6927                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6928                         add_data_backref(extent_cache, key.objectid, offset,
6929                                         0, 0, 0,
6930                                         btrfs_shared_data_ref_count(eb, sref),
6931                                         0, num_bytes);
6932                         break;
6933                 default:
6934                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6935                                 key.objectid, key.type, num_bytes);
6936                         goto out;
6937                 }
6938                 ptr += btrfs_extent_inline_ref_size(type);
6939         }
6940         WARN_ON(ptr > end);
6941 out:
6942         return 0;
6943 }
6944
6945 static int check_cache_range(struct btrfs_root *root,
6946                              struct btrfs_block_group_cache *cache,
6947                              u64 offset, u64 bytes)
6948 {
6949         struct btrfs_free_space *entry;
6950         u64 *logical;
6951         u64 bytenr;
6952         int stripe_len;
6953         int i, nr, ret;
6954
6955         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6956                 bytenr = btrfs_sb_offset(i);
6957                 ret = btrfs_rmap_block(root->fs_info,
6958                                        cache->key.objectid, bytenr, 0,
6959                                        &logical, &nr, &stripe_len);
6960                 if (ret)
6961                         return ret;
6962
6963                 while (nr--) {
6964                         if (logical[nr] + stripe_len <= offset)
6965                                 continue;
6966                         if (offset + bytes <= logical[nr])
6967                                 continue;
6968                         if (logical[nr] == offset) {
6969                                 if (stripe_len >= bytes) {
6970                                         free(logical);
6971                                         return 0;
6972                                 }
6973                                 bytes -= stripe_len;
6974                                 offset += stripe_len;
6975                         } else if (logical[nr] < offset) {
6976                                 if (logical[nr] + stripe_len >=
6977                                     offset + bytes) {
6978                                         free(logical);
6979                                         return 0;
6980                                 }
6981                                 bytes = (offset + bytes) -
6982                                         (logical[nr] + stripe_len);
6983                                 offset = logical[nr] + stripe_len;
6984                         } else {
6985                                 /*
6986                                  * Could be tricky, the super may land in the
6987                                  * middle of the area we're checking.  First
6988                                  * check the easiest case, it's at the end.
6989                                  */
6990                                 if (logical[nr] + stripe_len >=
6991                                     bytes + offset) {
6992                                         bytes = logical[nr] - offset;
6993                                         continue;
6994                                 }
6995
6996                                 /* Check the left side */
6997                                 ret = check_cache_range(root, cache,
6998                                                         offset,
6999                                                         logical[nr] - offset);
7000                                 if (ret) {
7001                                         free(logical);
7002                                         return ret;
7003                                 }
7004
7005                                 /* Now we continue with the right side */
7006                                 bytes = (offset + bytes) -
7007                                         (logical[nr] + stripe_len);
7008                                 offset = logical[nr] + stripe_len;
7009                         }
7010                 }
7011
7012                 free(logical);
7013         }
7014
7015         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7016         if (!entry) {
7017                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7018                         offset, offset+bytes);
7019                 return -EINVAL;
7020         }
7021
7022         if (entry->offset != offset) {
7023                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7024                         entry->offset);
7025                 return -EINVAL;
7026         }
7027
7028         if (entry->bytes != bytes) {
7029                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7030                         bytes, entry->bytes, offset);
7031                 return -EINVAL;
7032         }
7033
7034         unlink_free_space(cache->free_space_ctl, entry);
7035         free(entry);
7036         return 0;
7037 }
7038
7039 static int verify_space_cache(struct btrfs_root *root,
7040                               struct btrfs_block_group_cache *cache)
7041 {
7042         struct btrfs_path path;
7043         struct extent_buffer *leaf;
7044         struct btrfs_key key;
7045         u64 last;
7046         int ret = 0;
7047
7048         root = root->fs_info->extent_root;
7049
7050         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7051
7052         btrfs_init_path(&path);
7053         key.objectid = last;
7054         key.offset = 0;
7055         key.type = BTRFS_EXTENT_ITEM_KEY;
7056         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7057         if (ret < 0)
7058                 goto out;
7059         ret = 0;
7060         while (1) {
7061                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7062                         ret = btrfs_next_leaf(root, &path);
7063                         if (ret < 0)
7064                                 goto out;
7065                         if (ret > 0) {
7066                                 ret = 0;
7067                                 break;
7068                         }
7069                 }
7070                 leaf = path.nodes[0];
7071                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7072                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7073                         break;
7074                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7075                     key.type != BTRFS_METADATA_ITEM_KEY) {
7076                         path.slots[0]++;
7077                         continue;
7078                 }
7079
7080                 if (last == key.objectid) {
7081                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7082                                 last = key.objectid + key.offset;
7083                         else
7084                                 last = key.objectid + root->fs_info->nodesize;
7085                         path.slots[0]++;
7086                         continue;
7087                 }
7088
7089                 ret = check_cache_range(root, cache, last,
7090                                         key.objectid - last);
7091                 if (ret)
7092                         break;
7093                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7094                         last = key.objectid + key.offset;
7095                 else
7096                         last = key.objectid + root->fs_info->nodesize;
7097                 path.slots[0]++;
7098         }
7099
7100         if (last < cache->key.objectid + cache->key.offset)
7101                 ret = check_cache_range(root, cache, last,
7102                                         cache->key.objectid +
7103                                         cache->key.offset - last);
7104
7105 out:
7106         btrfs_release_path(&path);
7107
7108         if (!ret &&
7109             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7110                 fprintf(stderr, "There are still entries left in the space "
7111                         "cache\n");
7112                 ret = -EINVAL;
7113         }
7114
7115         return ret;
7116 }
7117
7118 static int check_space_cache(struct btrfs_root *root)
7119 {
7120         struct btrfs_block_group_cache *cache;
7121         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7122         int ret;
7123         int error = 0;
7124
7125         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7126             btrfs_super_generation(root->fs_info->super_copy) !=
7127             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7128                 printf("cache and super generation don't match, space cache "
7129                        "will be invalidated\n");
7130                 return 0;
7131         }
7132
7133         if (ctx.progress_enabled) {
7134                 ctx.tp = TASK_FREE_SPACE;
7135                 task_start(ctx.info);
7136         }
7137
7138         while (1) {
7139                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7140                 if (!cache)
7141                         break;
7142
7143                 start = cache->key.objectid + cache->key.offset;
7144                 if (!cache->free_space_ctl) {
7145                         if (btrfs_init_free_space_ctl(cache,
7146                                                 root->fs_info->sectorsize)) {
7147                                 ret = -ENOMEM;
7148                                 break;
7149                         }
7150                 } else {
7151                         btrfs_remove_free_space_cache(cache);
7152                 }
7153
7154                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7155                         ret = exclude_super_stripes(root, cache);
7156                         if (ret) {
7157                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7158                                         strerror(-ret));
7159                                 error++;
7160                                 continue;
7161                         }
7162                         ret = load_free_space_tree(root->fs_info, cache);
7163                         free_excluded_extents(root, cache);
7164                         if (ret < 0) {
7165                                 fprintf(stderr, "could not load free space tree: %s\n",
7166                                         strerror(-ret));
7167                                 error++;
7168                                 continue;
7169                         }
7170                         error += ret;
7171                 } else {
7172                         ret = load_free_space_cache(root->fs_info, cache);
7173                         if (!ret)
7174                                 continue;
7175                 }
7176
7177                 ret = verify_space_cache(root, cache);
7178                 if (ret) {
7179                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7180                                 cache->key.objectid);
7181                         error++;
7182                 }
7183         }
7184
7185         task_stop(ctx.info);
7186
7187         return error ? -EINVAL : 0;
7188 }
7189
7190 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7191                         u64 num_bytes, unsigned long leaf_offset,
7192                         struct extent_buffer *eb) {
7193
7194         struct btrfs_fs_info *fs_info = root->fs_info;
7195         u64 offset = 0;
7196         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7197         char *data;
7198         unsigned long csum_offset;
7199         u32 csum;
7200         u32 csum_expected;
7201         u64 read_len;
7202         u64 data_checked = 0;
7203         u64 tmp;
7204         int ret = 0;
7205         int mirror;
7206         int num_copies;
7207
7208         if (num_bytes % fs_info->sectorsize)
7209                 return -EINVAL;
7210
7211         data = malloc(num_bytes);
7212         if (!data)
7213                 return -ENOMEM;
7214
7215         while (offset < num_bytes) {
7216                 mirror = 0;
7217 again:
7218                 read_len = num_bytes - offset;
7219                 /* read as much space once a time */
7220                 ret = read_extent_data(fs_info, data + offset,
7221                                 bytenr + offset, &read_len, mirror);
7222                 if (ret)
7223                         goto out;
7224                 data_checked = 0;
7225                 /* verify every 4k data's checksum */
7226                 while (data_checked < read_len) {
7227                         csum = ~(u32)0;
7228                         tmp = offset + data_checked;
7229
7230                         csum = btrfs_csum_data((char *)data + tmp,
7231                                                csum, fs_info->sectorsize);
7232                         btrfs_csum_final(csum, (u8 *)&csum);
7233
7234                         csum_offset = leaf_offset +
7235                                  tmp / fs_info->sectorsize * csum_size;
7236                         read_extent_buffer(eb, (char *)&csum_expected,
7237                                            csum_offset, csum_size);
7238                         /* try another mirror */
7239                         if (csum != csum_expected) {
7240                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7241                                                 mirror, bytenr + tmp,
7242                                                 csum, csum_expected);
7243                                 num_copies = btrfs_num_copies(root->fs_info,
7244                                                 bytenr, num_bytes);
7245                                 if (mirror < num_copies - 1) {
7246                                         mirror += 1;
7247                                         goto again;
7248                                 }
7249                         }
7250                         data_checked += fs_info->sectorsize;
7251                 }
7252                 offset += read_len;
7253         }
7254 out:
7255         free(data);
7256         return ret;
7257 }
7258
7259 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7260                                u64 num_bytes)
7261 {
7262         struct btrfs_path path;
7263         struct extent_buffer *leaf;
7264         struct btrfs_key key;
7265         int ret;
7266
7267         btrfs_init_path(&path);
7268         key.objectid = bytenr;
7269         key.type = BTRFS_EXTENT_ITEM_KEY;
7270         key.offset = (u64)-1;
7271
7272 again:
7273         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7274                                 0, 0);
7275         if (ret < 0) {
7276                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7277                 btrfs_release_path(&path);
7278                 return ret;
7279         } else if (ret) {
7280                 if (path.slots[0] > 0) {
7281                         path.slots[0]--;
7282                 } else {
7283                         ret = btrfs_prev_leaf(root, &path);
7284                         if (ret < 0) {
7285                                 goto out;
7286                         } else if (ret > 0) {
7287                                 ret = 0;
7288                                 goto out;
7289                         }
7290                 }
7291         }
7292
7293         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7294
7295         /*
7296          * Block group items come before extent items if they have the same
7297          * bytenr, so walk back one more just in case.  Dear future traveller,
7298          * first congrats on mastering time travel.  Now if it's not too much
7299          * trouble could you go back to 2006 and tell Chris to make the
7300          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7301          * EXTENT_ITEM_KEY please?
7302          */
7303         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7304                 if (path.slots[0] > 0) {
7305                         path.slots[0]--;
7306                 } else {
7307                         ret = btrfs_prev_leaf(root, &path);
7308                         if (ret < 0) {
7309                                 goto out;
7310                         } else if (ret > 0) {
7311                                 ret = 0;
7312                                 goto out;
7313                         }
7314                 }
7315                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7316         }
7317
7318         while (num_bytes) {
7319                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7320                         ret = btrfs_next_leaf(root, &path);
7321                         if (ret < 0) {
7322                                 fprintf(stderr, "Error going to next leaf "
7323                                         "%d\n", ret);
7324                                 btrfs_release_path(&path);
7325                                 return ret;
7326                         } else if (ret) {
7327                                 break;
7328                         }
7329                 }
7330                 leaf = path.nodes[0];
7331                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7332                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7333                         path.slots[0]++;
7334                         continue;
7335                 }
7336                 if (key.objectid + key.offset < bytenr) {
7337                         path.slots[0]++;
7338                         continue;
7339                 }
7340                 if (key.objectid > bytenr + num_bytes)
7341                         break;
7342
7343                 if (key.objectid == bytenr) {
7344                         if (key.offset >= num_bytes) {
7345                                 num_bytes = 0;
7346                                 break;
7347                         }
7348                         num_bytes -= key.offset;
7349                         bytenr += key.offset;
7350                 } else if (key.objectid < bytenr) {
7351                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7352                                 num_bytes = 0;
7353                                 break;
7354                         }
7355                         num_bytes = (bytenr + num_bytes) -
7356                                 (key.objectid + key.offset);
7357                         bytenr = key.objectid + key.offset;
7358                 } else {
7359                         if (key.objectid + key.offset < bytenr + num_bytes) {
7360                                 u64 new_start = key.objectid + key.offset;
7361                                 u64 new_bytes = bytenr + num_bytes - new_start;
7362
7363                                 /*
7364                                  * Weird case, the extent is in the middle of
7365                                  * our range, we'll have to search one side
7366                                  * and then the other.  Not sure if this happens
7367                                  * in real life, but no harm in coding it up
7368                                  * anyway just in case.
7369                                  */
7370                                 btrfs_release_path(&path);
7371                                 ret = check_extent_exists(root, new_start,
7372                                                           new_bytes);
7373                                 if (ret) {
7374                                         fprintf(stderr, "Right section didn't "
7375                                                 "have a record\n");
7376                                         break;
7377                                 }
7378                                 num_bytes = key.objectid - bytenr;
7379                                 goto again;
7380                         }
7381                         num_bytes = key.objectid - bytenr;
7382                 }
7383                 path.slots[0]++;
7384         }
7385         ret = 0;
7386
7387 out:
7388         if (num_bytes && !ret) {
7389                 fprintf(stderr, "There are no extents for csum range "
7390                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7391                 ret = 1;
7392         }
7393
7394         btrfs_release_path(&path);
7395         return ret;
7396 }
7397
7398 static int check_csums(struct btrfs_root *root)
7399 {
7400         struct btrfs_path path;
7401         struct extent_buffer *leaf;
7402         struct btrfs_key key;
7403         u64 offset = 0, num_bytes = 0;
7404         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7405         int errors = 0;
7406         int ret;
7407         u64 data_len;
7408         unsigned long leaf_offset;
7409
7410         root = root->fs_info->csum_root;
7411         if (!extent_buffer_uptodate(root->node)) {
7412                 fprintf(stderr, "No valid csum tree found\n");
7413                 return -ENOENT;
7414         }
7415
7416         btrfs_init_path(&path);
7417         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7418         key.type = BTRFS_EXTENT_CSUM_KEY;
7419         key.offset = 0;
7420         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7421         if (ret < 0) {
7422                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7423                 btrfs_release_path(&path);
7424                 return ret;
7425         }
7426
7427         if (ret > 0 && path.slots[0])
7428                 path.slots[0]--;
7429         ret = 0;
7430
7431         while (1) {
7432                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7433                         ret = btrfs_next_leaf(root, &path);
7434                         if (ret < 0) {
7435                                 fprintf(stderr, "Error going to next leaf "
7436                                         "%d\n", ret);
7437                                 break;
7438                         }
7439                         if (ret)
7440                                 break;
7441                 }
7442                 leaf = path.nodes[0];
7443
7444                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7445                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7446                         path.slots[0]++;
7447                         continue;
7448                 }
7449
7450                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7451                               csum_size) * root->fs_info->sectorsize;
7452                 if (!check_data_csum)
7453                         goto skip_csum_check;
7454                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7455                 ret = check_extent_csums(root, key.offset, data_len,
7456                                          leaf_offset, leaf);
7457                 if (ret)
7458                         break;
7459 skip_csum_check:
7460                 if (!num_bytes) {
7461                         offset = key.offset;
7462                 } else if (key.offset != offset + num_bytes) {
7463                         ret = check_extent_exists(root, offset, num_bytes);
7464                         if (ret) {
7465                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7466                                         "there is no extent record\n",
7467                                         offset, offset+num_bytes);
7468                                 errors++;
7469                         }
7470                         offset = key.offset;
7471                         num_bytes = 0;
7472                 }
7473                 num_bytes += data_len;
7474                 path.slots[0]++;
7475         }
7476
7477         btrfs_release_path(&path);
7478         return errors;
7479 }
7480
7481 static int is_dropped_key(struct btrfs_key *key,
7482                           struct btrfs_key *drop_key) {
7483         if (key->objectid < drop_key->objectid)
7484                 return 1;
7485         else if (key->objectid == drop_key->objectid) {
7486                 if (key->type < drop_key->type)
7487                         return 1;
7488                 else if (key->type == drop_key->type) {
7489                         if (key->offset < drop_key->offset)
7490                                 return 1;
7491                 }
7492         }
7493         return 0;
7494 }
7495
7496 /*
7497  * Here are the rules for FULL_BACKREF.
7498  *
7499  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7500  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7501  *      FULL_BACKREF set.
7502  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7503  *    if it happened after the relocation occurred since we'll have dropped the
7504  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7505  *    have no real way to know for sure.
7506  *
7507  * We process the blocks one root at a time, and we start from the lowest root
7508  * objectid and go to the highest.  So we can just lookup the owner backref for
7509  * the record and if we don't find it then we know it doesn't exist and we have
7510  * a FULL BACKREF.
7511  *
7512  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7513  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7514  * be set or not and then we can check later once we've gathered all the refs.
7515  */
7516 static int calc_extent_flag(struct cache_tree *extent_cache,
7517                            struct extent_buffer *buf,
7518                            struct root_item_record *ri,
7519                            u64 *flags)
7520 {
7521         struct extent_record *rec;
7522         struct cache_extent *cache;
7523         struct tree_backref *tback;
7524         u64 owner = 0;
7525
7526         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7527         /* we have added this extent before */
7528         if (!cache)
7529                 return -ENOENT;
7530
7531         rec = container_of(cache, struct extent_record, cache);
7532
7533         /*
7534          * Except file/reloc tree, we can not have
7535          * FULL BACKREF MODE
7536          */
7537         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7538                 goto normal;
7539         /*
7540          * root node
7541          */
7542         if (buf->start == ri->bytenr)
7543                 goto normal;
7544
7545         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7546                 goto full_backref;
7547
7548         owner = btrfs_header_owner(buf);
7549         if (owner == ri->objectid)
7550                 goto normal;
7551
7552         tback = find_tree_backref(rec, 0, owner);
7553         if (!tback)
7554                 goto full_backref;
7555 normal:
7556         *flags = 0;
7557         if (rec->flag_block_full_backref != FLAG_UNSET &&
7558             rec->flag_block_full_backref != 0)
7559                 rec->bad_full_backref = 1;
7560         return 0;
7561 full_backref:
7562         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7563         if (rec->flag_block_full_backref != FLAG_UNSET &&
7564             rec->flag_block_full_backref != 1)
7565                 rec->bad_full_backref = 1;
7566         return 0;
7567 }
7568
7569 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7570 {
7571         fprintf(stderr, "Invalid key type(");
7572         print_key_type(stderr, 0, key_type);
7573         fprintf(stderr, ") found in root(");
7574         print_objectid(stderr, rootid, 0);
7575         fprintf(stderr, ")\n");
7576 }
7577
7578 /*
7579  * Check if the key is valid with its extent buffer.
7580  *
7581  * This is a early check in case invalid key exists in a extent buffer
7582  * This is not comprehensive yet, but should prevent wrong key/item passed
7583  * further
7584  */
7585 static int check_type_with_root(u64 rootid, u8 key_type)
7586 {
7587         switch (key_type) {
7588         /* Only valid in chunk tree */
7589         case BTRFS_DEV_ITEM_KEY:
7590         case BTRFS_CHUNK_ITEM_KEY:
7591                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7592                         goto err;
7593                 break;
7594         /* valid in csum and log tree */
7595         case BTRFS_CSUM_TREE_OBJECTID:
7596                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7597                       is_fstree(rootid)))
7598                         goto err;
7599                 break;
7600         case BTRFS_EXTENT_ITEM_KEY:
7601         case BTRFS_METADATA_ITEM_KEY:
7602         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7603                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7604                         goto err;
7605                 break;
7606         case BTRFS_ROOT_ITEM_KEY:
7607                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7608                         goto err;
7609                 break;
7610         case BTRFS_DEV_EXTENT_KEY:
7611                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7612                         goto err;
7613                 break;
7614         }
7615         return 0;
7616 err:
7617         report_mismatch_key_root(key_type, rootid);
7618         return -EINVAL;
7619 }
7620
7621 static int run_next_block(struct btrfs_root *root,
7622                           struct block_info *bits,
7623                           int bits_nr,
7624                           u64 *last,
7625                           struct cache_tree *pending,
7626                           struct cache_tree *seen,
7627                           struct cache_tree *reada,
7628                           struct cache_tree *nodes,
7629                           struct cache_tree *extent_cache,
7630                           struct cache_tree *chunk_cache,
7631                           struct rb_root *dev_cache,
7632                           struct block_group_tree *block_group_cache,
7633                           struct device_extent_tree *dev_extent_cache,
7634                           struct root_item_record *ri)
7635 {
7636         struct btrfs_fs_info *fs_info = root->fs_info;
7637         struct extent_buffer *buf;
7638         struct extent_record *rec = NULL;
7639         u64 bytenr;
7640         u32 size;
7641         u64 parent;
7642         u64 owner;
7643         u64 flags;
7644         u64 ptr;
7645         u64 gen = 0;
7646         int ret = 0;
7647         int i;
7648         int nritems;
7649         struct btrfs_key key;
7650         struct cache_extent *cache;
7651         int reada_bits;
7652
7653         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7654                                     bits_nr, &reada_bits);
7655         if (nritems == 0)
7656                 return 1;
7657
7658         if (!reada_bits) {
7659                 for(i = 0; i < nritems; i++) {
7660                         ret = add_cache_extent(reada, bits[i].start,
7661                                                bits[i].size);
7662                         if (ret == -EEXIST)
7663                                 continue;
7664
7665                         /* fixme, get the parent transid */
7666                         readahead_tree_block(fs_info, bits[i].start, 0);
7667                 }
7668         }
7669         *last = bits[0].start;
7670         bytenr = bits[0].start;
7671         size = bits[0].size;
7672
7673         cache = lookup_cache_extent(pending, bytenr, size);
7674         if (cache) {
7675                 remove_cache_extent(pending, cache);
7676                 free(cache);
7677         }
7678         cache = lookup_cache_extent(reada, bytenr, size);
7679         if (cache) {
7680                 remove_cache_extent(reada, cache);
7681                 free(cache);
7682         }
7683         cache = lookup_cache_extent(nodes, bytenr, size);
7684         if (cache) {
7685                 remove_cache_extent(nodes, cache);
7686                 free(cache);
7687         }
7688         cache = lookup_cache_extent(extent_cache, bytenr, size);
7689         if (cache) {
7690                 rec = container_of(cache, struct extent_record, cache);
7691                 gen = rec->parent_generation;
7692         }
7693
7694         /* fixme, get the real parent transid */
7695         buf = read_tree_block(root->fs_info, bytenr, gen);
7696         if (!extent_buffer_uptodate(buf)) {
7697                 record_bad_block_io(root->fs_info,
7698                                     extent_cache, bytenr, size);
7699                 goto out;
7700         }
7701
7702         nritems = btrfs_header_nritems(buf);
7703
7704         flags = 0;
7705         if (!init_extent_tree) {
7706                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7707                                        btrfs_header_level(buf), 1, NULL,
7708                                        &flags);
7709                 if (ret < 0) {
7710                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7711                         if (ret < 0) {
7712                                 fprintf(stderr, "Couldn't calc extent flags\n");
7713                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7714                         }
7715                 }
7716         } else {
7717                 flags = 0;
7718                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7719                 if (ret < 0) {
7720                         fprintf(stderr, "Couldn't calc extent flags\n");
7721                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7722                 }
7723         }
7724
7725         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7726                 if (ri != NULL &&
7727                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7728                     ri->objectid == btrfs_header_owner(buf)) {
7729                         /*
7730                          * Ok we got to this block from it's original owner and
7731                          * we have FULL_BACKREF set.  Relocation can leave
7732                          * converted blocks over so this is altogether possible,
7733                          * however it's not possible if the generation > the
7734                          * last snapshot, so check for this case.
7735                          */
7736                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7737                             btrfs_header_generation(buf) > ri->last_snapshot) {
7738                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7739                                 rec->bad_full_backref = 1;
7740                         }
7741                 }
7742         } else {
7743                 if (ri != NULL &&
7744                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7745                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7746                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7747                         rec->bad_full_backref = 1;
7748                 }
7749         }
7750
7751         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7752                 rec->flag_block_full_backref = 1;
7753                 parent = bytenr;
7754                 owner = 0;
7755         } else {
7756                 rec->flag_block_full_backref = 0;
7757                 parent = 0;
7758                 owner = btrfs_header_owner(buf);
7759         }
7760
7761         ret = check_block(root, extent_cache, buf, flags);
7762         if (ret)
7763                 goto out;
7764
7765         if (btrfs_is_leaf(buf)) {
7766                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7767                 for (i = 0; i < nritems; i++) {
7768                         struct btrfs_file_extent_item *fi;
7769                         btrfs_item_key_to_cpu(buf, &key, i);
7770                         /*
7771                          * Check key type against the leaf owner.
7772                          * Could filter quite a lot of early error if
7773                          * owner is correct
7774                          */
7775                         if (check_type_with_root(btrfs_header_owner(buf),
7776                                                  key.type)) {
7777                                 fprintf(stderr, "ignoring invalid key\n");
7778                                 continue;
7779                         }
7780                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7781                                 process_extent_item(root, extent_cache, buf,
7782                                                     i);
7783                                 continue;
7784                         }
7785                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7786                                 process_extent_item(root, extent_cache, buf,
7787                                                     i);
7788                                 continue;
7789                         }
7790                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7791                                 total_csum_bytes +=
7792                                         btrfs_item_size_nr(buf, i);
7793                                 continue;
7794                         }
7795                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7796                                 process_chunk_item(chunk_cache, &key, buf, i);
7797                                 continue;
7798                         }
7799                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7800                                 process_device_item(dev_cache, &key, buf, i);
7801                                 continue;
7802                         }
7803                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7804                                 process_block_group_item(block_group_cache,
7805                                         &key, buf, i);
7806                                 continue;
7807                         }
7808                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7809                                 process_device_extent_item(dev_extent_cache,
7810                                         &key, buf, i);
7811                                 continue;
7812
7813                         }
7814                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7815 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7816                                 process_extent_ref_v0(extent_cache, buf, i);
7817 #else
7818                                 BUG();
7819 #endif
7820                                 continue;
7821                         }
7822
7823                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7824                                 ret = add_tree_backref(extent_cache,
7825                                                 key.objectid, 0, key.offset, 0);
7826                                 if (ret < 0)
7827                                         error(
7828                                 "add_tree_backref failed (leaf tree block): %s",
7829                                               strerror(-ret));
7830                                 continue;
7831                         }
7832                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7833                                 ret = add_tree_backref(extent_cache,
7834                                                 key.objectid, key.offset, 0, 0);
7835                                 if (ret < 0)
7836                                         error(
7837                                 "add_tree_backref failed (leaf shared block): %s",
7838                                               strerror(-ret));
7839                                 continue;
7840                         }
7841                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7842                                 struct btrfs_extent_data_ref *ref;
7843                                 ref = btrfs_item_ptr(buf, i,
7844                                                 struct btrfs_extent_data_ref);
7845                                 add_data_backref(extent_cache,
7846                                         key.objectid, 0,
7847                                         btrfs_extent_data_ref_root(buf, ref),
7848                                         btrfs_extent_data_ref_objectid(buf,
7849                                                                        ref),
7850                                         btrfs_extent_data_ref_offset(buf, ref),
7851                                         btrfs_extent_data_ref_count(buf, ref),
7852                                         0, root->fs_info->sectorsize);
7853                                 continue;
7854                         }
7855                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7856                                 struct btrfs_shared_data_ref *ref;
7857                                 ref = btrfs_item_ptr(buf, i,
7858                                                 struct btrfs_shared_data_ref);
7859                                 add_data_backref(extent_cache,
7860                                         key.objectid, key.offset, 0, 0, 0,
7861                                         btrfs_shared_data_ref_count(buf, ref),
7862                                         0, root->fs_info->sectorsize);
7863                                 continue;
7864                         }
7865                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7866                                 struct bad_item *bad;
7867
7868                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7869                                         continue;
7870                                 if (!owner)
7871                                         continue;
7872                                 bad = malloc(sizeof(struct bad_item));
7873                                 if (!bad)
7874                                         continue;
7875                                 INIT_LIST_HEAD(&bad->list);
7876                                 memcpy(&bad->key, &key,
7877                                        sizeof(struct btrfs_key));
7878                                 bad->root_id = owner;
7879                                 list_add_tail(&bad->list, &delete_items);
7880                                 continue;
7881                         }
7882                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7883                                 continue;
7884                         fi = btrfs_item_ptr(buf, i,
7885                                             struct btrfs_file_extent_item);
7886                         if (btrfs_file_extent_type(buf, fi) ==
7887                             BTRFS_FILE_EXTENT_INLINE)
7888                                 continue;
7889                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7890                                 continue;
7891
7892                         data_bytes_allocated +=
7893                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7894                         if (data_bytes_allocated < root->fs_info->sectorsize) {
7895                                 abort();
7896                         }
7897                         data_bytes_referenced +=
7898                                 btrfs_file_extent_num_bytes(buf, fi);
7899                         add_data_backref(extent_cache,
7900                                 btrfs_file_extent_disk_bytenr(buf, fi),
7901                                 parent, owner, key.objectid, key.offset -
7902                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7903                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7904                 }
7905         } else {
7906                 int level;
7907                 struct btrfs_key first_key;
7908
7909                 first_key.objectid = 0;
7910
7911                 if (nritems > 0)
7912                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7913                 level = btrfs_header_level(buf);
7914                 for (i = 0; i < nritems; i++) {
7915                         struct extent_record tmpl;
7916
7917                         ptr = btrfs_node_blockptr(buf, i);
7918                         size = root->fs_info->nodesize;
7919                         btrfs_node_key_to_cpu(buf, &key, i);
7920                         if (ri != NULL) {
7921                                 if ((level == ri->drop_level)
7922                                     && is_dropped_key(&key, &ri->drop_key)) {
7923                                         continue;
7924                                 }
7925                         }
7926
7927                         memset(&tmpl, 0, sizeof(tmpl));
7928                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7929                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7930                         tmpl.start = ptr;
7931                         tmpl.nr = size;
7932                         tmpl.refs = 1;
7933                         tmpl.metadata = 1;
7934                         tmpl.max_size = size;
7935                         ret = add_extent_rec(extent_cache, &tmpl);
7936                         if (ret < 0)
7937                                 goto out;
7938
7939                         ret = add_tree_backref(extent_cache, ptr, parent,
7940                                         owner, 1);
7941                         if (ret < 0) {
7942                                 error(
7943                                 "add_tree_backref failed (non-leaf block): %s",
7944                                       strerror(-ret));
7945                                 continue;
7946                         }
7947
7948                         if (level > 1) {
7949                                 add_pending(nodes, seen, ptr, size);
7950                         } else {
7951                                 add_pending(pending, seen, ptr, size);
7952                         }
7953                 }
7954                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7955                                       nritems) * sizeof(struct btrfs_key_ptr);
7956         }
7957         total_btree_bytes += buf->len;
7958         if (fs_root_objectid(btrfs_header_owner(buf)))
7959                 total_fs_tree_bytes += buf->len;
7960         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7961                 total_extent_tree_bytes += buf->len;
7962 out:
7963         free_extent_buffer(buf);
7964         return ret;
7965 }
7966
7967 static int add_root_to_pending(struct extent_buffer *buf,
7968                                struct cache_tree *extent_cache,
7969                                struct cache_tree *pending,
7970                                struct cache_tree *seen,
7971                                struct cache_tree *nodes,
7972                                u64 objectid)
7973 {
7974         struct extent_record tmpl;
7975         int ret;
7976
7977         if (btrfs_header_level(buf) > 0)
7978                 add_pending(nodes, seen, buf->start, buf->len);
7979         else
7980                 add_pending(pending, seen, buf->start, buf->len);
7981
7982         memset(&tmpl, 0, sizeof(tmpl));
7983         tmpl.start = buf->start;
7984         tmpl.nr = buf->len;
7985         tmpl.is_root = 1;
7986         tmpl.refs = 1;
7987         tmpl.metadata = 1;
7988         tmpl.max_size = buf->len;
7989         add_extent_rec(extent_cache, &tmpl);
7990
7991         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7992             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7993                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7994                                 0, 1);
7995         else
7996                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7997                                 1);
7998         return ret;
7999 }
8000
8001 /* as we fix the tree, we might be deleting blocks that
8002  * we're tracking for repair.  This hook makes sure we
8003  * remove any backrefs for blocks as we are fixing them.
8004  */
8005 static int free_extent_hook(struct btrfs_trans_handle *trans,
8006                             struct btrfs_root *root,
8007                             u64 bytenr, u64 num_bytes, u64 parent,
8008                             u64 root_objectid, u64 owner, u64 offset,
8009                             int refs_to_drop)
8010 {
8011         struct extent_record *rec;
8012         struct cache_extent *cache;
8013         int is_data;
8014         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8015
8016         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8017         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8018         if (!cache)
8019                 return 0;
8020
8021         rec = container_of(cache, struct extent_record, cache);
8022         if (is_data) {
8023                 struct data_backref *back;
8024                 back = find_data_backref(rec, parent, root_objectid, owner,
8025                                          offset, 1, bytenr, num_bytes);
8026                 if (!back)
8027                         goto out;
8028                 if (back->node.found_ref) {
8029                         back->found_ref -= refs_to_drop;
8030                         if (rec->refs)
8031                                 rec->refs -= refs_to_drop;
8032                 }
8033                 if (back->node.found_extent_tree) {
8034                         back->num_refs -= refs_to_drop;
8035                         if (rec->extent_item_refs)
8036                                 rec->extent_item_refs -= refs_to_drop;
8037                 }
8038                 if (back->found_ref == 0)
8039                         back->node.found_ref = 0;
8040                 if (back->num_refs == 0)
8041                         back->node.found_extent_tree = 0;
8042
8043                 if (!back->node.found_extent_tree && back->node.found_ref) {
8044                         list_del(&back->node.list);
8045                         free(back);
8046                 }
8047         } else {
8048                 struct tree_backref *back;
8049                 back = find_tree_backref(rec, parent, root_objectid);
8050                 if (!back)
8051                         goto out;
8052                 if (back->node.found_ref) {
8053                         if (rec->refs)
8054                                 rec->refs--;
8055                         back->node.found_ref = 0;
8056                 }
8057                 if (back->node.found_extent_tree) {
8058                         if (rec->extent_item_refs)
8059                                 rec->extent_item_refs--;
8060                         back->node.found_extent_tree = 0;
8061                 }
8062                 if (!back->node.found_extent_tree && back->node.found_ref) {
8063                         list_del(&back->node.list);
8064                         free(back);
8065                 }
8066         }
8067         maybe_free_extent_rec(extent_cache, rec);
8068 out:
8069         return 0;
8070 }
8071
8072 static int delete_extent_records(struct btrfs_trans_handle *trans,
8073                                  struct btrfs_root *root,
8074                                  struct btrfs_path *path,
8075                                  u64 bytenr)
8076 {
8077         struct btrfs_key key;
8078         struct btrfs_key found_key;
8079         struct extent_buffer *leaf;
8080         int ret;
8081         int slot;
8082
8083
8084         key.objectid = bytenr;
8085         key.type = (u8)-1;
8086         key.offset = (u64)-1;
8087
8088         while(1) {
8089                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8090                                         &key, path, 0, 1);
8091                 if (ret < 0)
8092                         break;
8093
8094                 if (ret > 0) {
8095                         ret = 0;
8096                         if (path->slots[0] == 0)
8097                                 break;
8098                         path->slots[0]--;
8099                 }
8100                 ret = 0;
8101
8102                 leaf = path->nodes[0];
8103                 slot = path->slots[0];
8104
8105                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8106                 if (found_key.objectid != bytenr)
8107                         break;
8108
8109                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8110                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8111                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8112                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8113                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8114                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8115                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8116                         btrfs_release_path(path);
8117                         if (found_key.type == 0) {
8118                                 if (found_key.offset == 0)
8119                                         break;
8120                                 key.offset = found_key.offset - 1;
8121                                 key.type = found_key.type;
8122                         }
8123                         key.type = found_key.type - 1;
8124                         key.offset = (u64)-1;
8125                         continue;
8126                 }
8127
8128                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8129                         found_key.objectid, found_key.type, found_key.offset);
8130
8131                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8132                 if (ret)
8133                         break;
8134                 btrfs_release_path(path);
8135
8136                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8137                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8138                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8139                                 found_key.offset : root->fs_info->nodesize;
8140
8141                         ret = btrfs_update_block_group(trans, root, bytenr,
8142                                                        bytes, 0, 0);
8143                         if (ret)
8144                                 break;
8145                 }
8146         }
8147
8148         btrfs_release_path(path);
8149         return ret;
8150 }
8151
8152 /*
8153  * for a single backref, this will allocate a new extent
8154  * and add the backref to it.
8155  */
8156 static int record_extent(struct btrfs_trans_handle *trans,
8157                          struct btrfs_fs_info *info,
8158                          struct btrfs_path *path,
8159                          struct extent_record *rec,
8160                          struct extent_backref *back,
8161                          int allocated, u64 flags)
8162 {
8163         int ret = 0;
8164         struct btrfs_root *extent_root = info->extent_root;
8165         struct extent_buffer *leaf;
8166         struct btrfs_key ins_key;
8167         struct btrfs_extent_item *ei;
8168         struct data_backref *dback;
8169         struct btrfs_tree_block_info *bi;
8170
8171         if (!back->is_data)
8172                 rec->max_size = max_t(u64, rec->max_size,
8173                                     info->nodesize);
8174
8175         if (!allocated) {
8176                 u32 item_size = sizeof(*ei);
8177
8178                 if (!back->is_data)
8179                         item_size += sizeof(*bi);
8180
8181                 ins_key.objectid = rec->start;
8182                 ins_key.offset = rec->max_size;
8183                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8184
8185                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8186                                         &ins_key, item_size);
8187                 if (ret)
8188                         goto fail;
8189
8190                 leaf = path->nodes[0];
8191                 ei = btrfs_item_ptr(leaf, path->slots[0],
8192                                     struct btrfs_extent_item);
8193
8194                 btrfs_set_extent_refs(leaf, ei, 0);
8195                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8196
8197                 if (back->is_data) {
8198                         btrfs_set_extent_flags(leaf, ei,
8199                                                BTRFS_EXTENT_FLAG_DATA);
8200                 } else {
8201                         struct btrfs_disk_key copy_key;;
8202
8203                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8204                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8205                                              sizeof(*bi));
8206
8207                         btrfs_set_disk_key_objectid(&copy_key,
8208                                                     rec->info_objectid);
8209                         btrfs_set_disk_key_type(&copy_key, 0);
8210                         btrfs_set_disk_key_offset(&copy_key, 0);
8211
8212                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8213                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8214
8215                         btrfs_set_extent_flags(leaf, ei,
8216                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8217                 }
8218
8219                 btrfs_mark_buffer_dirty(leaf);
8220                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8221                                                rec->max_size, 1, 0);
8222                 if (ret)
8223                         goto fail;
8224                 btrfs_release_path(path);
8225         }
8226
8227         if (back->is_data) {
8228                 u64 parent;
8229                 int i;
8230
8231                 dback = to_data_backref(back);
8232                 if (back->full_backref)
8233                         parent = dback->parent;
8234                 else
8235                         parent = 0;
8236
8237                 for (i = 0; i < dback->found_ref; i++) {
8238                         /* if parent != 0, we're doing a full backref
8239                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8240                          * just makes the backref allocator create a data
8241                          * backref
8242                          */
8243                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8244                                                    rec->start, rec->max_size,
8245                                                    parent,
8246                                                    dback->root,
8247                                                    parent ?
8248                                                    BTRFS_FIRST_FREE_OBJECTID :
8249                                                    dback->owner,
8250                                                    dback->offset);
8251                         if (ret)
8252                                 break;
8253                 }
8254                 fprintf(stderr, "adding new data backref"
8255                                 " on %llu %s %llu owner %llu"
8256                                 " offset %llu found %d\n",
8257                                 (unsigned long long)rec->start,
8258                                 back->full_backref ?
8259                                 "parent" : "root",
8260                                 back->full_backref ?
8261                                 (unsigned long long)parent :
8262                                 (unsigned long long)dback->root,
8263                                 (unsigned long long)dback->owner,
8264                                 (unsigned long long)dback->offset,
8265                                 dback->found_ref);
8266         } else {
8267                 u64 parent;
8268                 struct tree_backref *tback;
8269
8270                 tback = to_tree_backref(back);
8271                 if (back->full_backref)
8272                         parent = tback->parent;
8273                 else
8274                         parent = 0;
8275
8276                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8277                                            rec->start, rec->max_size,
8278                                            parent, tback->root, 0, 0);
8279                 fprintf(stderr, "adding new tree backref on "
8280                         "start %llu len %llu parent %llu root %llu\n",
8281                         rec->start, rec->max_size, parent, tback->root);
8282         }
8283 fail:
8284         btrfs_release_path(path);
8285         return ret;
8286 }
8287
8288 static struct extent_entry *find_entry(struct list_head *entries,
8289                                        u64 bytenr, u64 bytes)
8290 {
8291         struct extent_entry *entry = NULL;
8292
8293         list_for_each_entry(entry, entries, list) {
8294                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8295                         return entry;
8296         }
8297
8298         return NULL;
8299 }
8300
8301 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8302 {
8303         struct extent_entry *entry, *best = NULL, *prev = NULL;
8304
8305         list_for_each_entry(entry, entries, list) {
8306                 /*
8307                  * If there are as many broken entries as entries then we know
8308                  * not to trust this particular entry.
8309                  */
8310                 if (entry->broken == entry->count)
8311                         continue;
8312
8313                 /*
8314                  * Special case, when there are only two entries and 'best' is
8315                  * the first one
8316                  */
8317                 if (!prev) {
8318                         best = entry;
8319                         prev = entry;
8320                         continue;
8321                 }
8322
8323                 /*
8324                  * If our current entry == best then we can't be sure our best
8325                  * is really the best, so we need to keep searching.
8326                  */
8327                 if (best && best->count == entry->count) {
8328                         prev = entry;
8329                         best = NULL;
8330                         continue;
8331                 }
8332
8333                 /* Prev == entry, not good enough, have to keep searching */
8334                 if (!prev->broken && prev->count == entry->count)
8335                         continue;
8336
8337                 if (!best)
8338                         best = (prev->count > entry->count) ? prev : entry;
8339                 else if (best->count < entry->count)
8340                         best = entry;
8341                 prev = entry;
8342         }
8343
8344         return best;
8345 }
8346
8347 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8348                       struct data_backref *dback, struct extent_entry *entry)
8349 {
8350         struct btrfs_trans_handle *trans;
8351         struct btrfs_root *root;
8352         struct btrfs_file_extent_item *fi;
8353         struct extent_buffer *leaf;
8354         struct btrfs_key key;
8355         u64 bytenr, bytes;
8356         int ret, err;
8357
8358         key.objectid = dback->root;
8359         key.type = BTRFS_ROOT_ITEM_KEY;
8360         key.offset = (u64)-1;
8361         root = btrfs_read_fs_root(info, &key);
8362         if (IS_ERR(root)) {
8363                 fprintf(stderr, "Couldn't find root for our ref\n");
8364                 return -EINVAL;
8365         }
8366
8367         /*
8368          * The backref points to the original offset of the extent if it was
8369          * split, so we need to search down to the offset we have and then walk
8370          * forward until we find the backref we're looking for.
8371          */
8372         key.objectid = dback->owner;
8373         key.type = BTRFS_EXTENT_DATA_KEY;
8374         key.offset = dback->offset;
8375         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8376         if (ret < 0) {
8377                 fprintf(stderr, "Error looking up ref %d\n", ret);
8378                 return ret;
8379         }
8380
8381         while (1) {
8382                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8383                         ret = btrfs_next_leaf(root, path);
8384                         if (ret) {
8385                                 fprintf(stderr, "Couldn't find our ref, next\n");
8386                                 return -EINVAL;
8387                         }
8388                 }
8389                 leaf = path->nodes[0];
8390                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8391                 if (key.objectid != dback->owner ||
8392                     key.type != BTRFS_EXTENT_DATA_KEY) {
8393                         fprintf(stderr, "Couldn't find our ref, search\n");
8394                         return -EINVAL;
8395                 }
8396                 fi = btrfs_item_ptr(leaf, path->slots[0],
8397                                     struct btrfs_file_extent_item);
8398                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8399                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8400
8401                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8402                         break;
8403                 path->slots[0]++;
8404         }
8405
8406         btrfs_release_path(path);
8407
8408         trans = btrfs_start_transaction(root, 1);
8409         if (IS_ERR(trans))
8410                 return PTR_ERR(trans);
8411
8412         /*
8413          * Ok we have the key of the file extent we want to fix, now we can cow
8414          * down to the thing and fix it.
8415          */
8416         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8417         if (ret < 0) {
8418                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8419                         key.objectid, key.type, key.offset, ret);
8420                 goto out;
8421         }
8422         if (ret > 0) {
8423                 fprintf(stderr, "Well that's odd, we just found this key "
8424                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8425                         key.offset);
8426                 ret = -EINVAL;
8427                 goto out;
8428         }
8429         leaf = path->nodes[0];
8430         fi = btrfs_item_ptr(leaf, path->slots[0],
8431                             struct btrfs_file_extent_item);
8432
8433         if (btrfs_file_extent_compression(leaf, fi) &&
8434             dback->disk_bytenr != entry->bytenr) {
8435                 fprintf(stderr, "Ref doesn't match the record start and is "
8436                         "compressed, please take a btrfs-image of this file "
8437                         "system and send it to a btrfs developer so they can "
8438                         "complete this functionality for bytenr %Lu\n",
8439                         dback->disk_bytenr);
8440                 ret = -EINVAL;
8441                 goto out;
8442         }
8443
8444         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8445                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8446         } else if (dback->disk_bytenr > entry->bytenr) {
8447                 u64 off_diff, offset;
8448
8449                 off_diff = dback->disk_bytenr - entry->bytenr;
8450                 offset = btrfs_file_extent_offset(leaf, fi);
8451                 if (dback->disk_bytenr + offset +
8452                     btrfs_file_extent_num_bytes(leaf, fi) >
8453                     entry->bytenr + entry->bytes) {
8454                         fprintf(stderr, "Ref is past the entry end, please "
8455                                 "take a btrfs-image of this file system and "
8456                                 "send it to a btrfs developer, ref %Lu\n",
8457                                 dback->disk_bytenr);
8458                         ret = -EINVAL;
8459                         goto out;
8460                 }
8461                 offset += off_diff;
8462                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8463                 btrfs_set_file_extent_offset(leaf, fi, offset);
8464         } else if (dback->disk_bytenr < entry->bytenr) {
8465                 u64 offset;
8466
8467                 offset = btrfs_file_extent_offset(leaf, fi);
8468                 if (dback->disk_bytenr + offset < entry->bytenr) {
8469                         fprintf(stderr, "Ref is before the entry start, please"
8470                                 " take a btrfs-image of this file system and "
8471                                 "send it to a btrfs developer, ref %Lu\n",
8472                                 dback->disk_bytenr);
8473                         ret = -EINVAL;
8474                         goto out;
8475                 }
8476
8477                 offset += dback->disk_bytenr;
8478                 offset -= entry->bytenr;
8479                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8480                 btrfs_set_file_extent_offset(leaf, fi, offset);
8481         }
8482
8483         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8484
8485         /*
8486          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8487          * only do this if we aren't using compression, otherwise it's a
8488          * trickier case.
8489          */
8490         if (!btrfs_file_extent_compression(leaf, fi))
8491                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8492         else
8493                 printf("ram bytes may be wrong?\n");
8494         btrfs_mark_buffer_dirty(leaf);
8495 out:
8496         err = btrfs_commit_transaction(trans, root);
8497         btrfs_release_path(path);
8498         return ret ? ret : err;
8499 }
8500
8501 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8502                            struct extent_record *rec)
8503 {
8504         struct extent_backref *back;
8505         struct data_backref *dback;
8506         struct extent_entry *entry, *best = NULL;
8507         LIST_HEAD(entries);
8508         int nr_entries = 0;
8509         int broken_entries = 0;
8510         int ret = 0;
8511         short mismatch = 0;
8512
8513         /*
8514          * Metadata is easy and the backrefs should always agree on bytenr and
8515          * size, if not we've got bigger issues.
8516          */
8517         if (rec->metadata)
8518                 return 0;
8519
8520         list_for_each_entry(back, &rec->backrefs, list) {
8521                 if (back->full_backref || !back->is_data)
8522                         continue;
8523
8524                 dback = to_data_backref(back);
8525
8526                 /*
8527                  * We only pay attention to backrefs that we found a real
8528                  * backref for.
8529                  */
8530                 if (dback->found_ref == 0)
8531                         continue;
8532
8533                 /*
8534                  * For now we only catch when the bytes don't match, not the
8535                  * bytenr.  We can easily do this at the same time, but I want
8536                  * to have a fs image to test on before we just add repair
8537                  * functionality willy-nilly so we know we won't screw up the
8538                  * repair.
8539                  */
8540
8541                 entry = find_entry(&entries, dback->disk_bytenr,
8542                                    dback->bytes);
8543                 if (!entry) {
8544                         entry = malloc(sizeof(struct extent_entry));
8545                         if (!entry) {
8546                                 ret = -ENOMEM;
8547                                 goto out;
8548                         }
8549                         memset(entry, 0, sizeof(*entry));
8550                         entry->bytenr = dback->disk_bytenr;
8551                         entry->bytes = dback->bytes;
8552                         list_add_tail(&entry->list, &entries);
8553                         nr_entries++;
8554                 }
8555
8556                 /*
8557                  * If we only have on entry we may think the entries agree when
8558                  * in reality they don't so we have to do some extra checking.
8559                  */
8560                 if (dback->disk_bytenr != rec->start ||
8561                     dback->bytes != rec->nr || back->broken)
8562                         mismatch = 1;
8563
8564                 if (back->broken) {
8565                         entry->broken++;
8566                         broken_entries++;
8567                 }
8568
8569                 entry->count++;
8570         }
8571
8572         /* Yay all the backrefs agree, carry on good sir */
8573         if (nr_entries <= 1 && !mismatch)
8574                 goto out;
8575
8576         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8577                 "%Lu\n", rec->start);
8578
8579         /*
8580          * First we want to see if the backrefs can agree amongst themselves who
8581          * is right, so figure out which one of the entries has the highest
8582          * count.
8583          */
8584         best = find_most_right_entry(&entries);
8585
8586         /*
8587          * Ok so we may have an even split between what the backrefs think, so
8588          * this is where we use the extent ref to see what it thinks.
8589          */
8590         if (!best) {
8591                 entry = find_entry(&entries, rec->start, rec->nr);
8592                 if (!entry && (!broken_entries || !rec->found_rec)) {
8593                         fprintf(stderr, "Backrefs don't agree with each other "
8594                                 "and extent record doesn't agree with anybody,"
8595                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8596                                 rec->start, rec->nr);
8597                         ret = -EINVAL;
8598                         goto out;
8599                 } else if (!entry) {
8600                         /*
8601                          * Ok our backrefs were broken, we'll assume this is the
8602                          * correct value and add an entry for this range.
8603                          */
8604                         entry = malloc(sizeof(struct extent_entry));
8605                         if (!entry) {
8606                                 ret = -ENOMEM;
8607                                 goto out;
8608                         }
8609                         memset(entry, 0, sizeof(*entry));
8610                         entry->bytenr = rec->start;
8611                         entry->bytes = rec->nr;
8612                         list_add_tail(&entry->list, &entries);
8613                         nr_entries++;
8614                 }
8615                 entry->count++;
8616                 best = find_most_right_entry(&entries);
8617                 if (!best) {
8618                         fprintf(stderr, "Backrefs and extent record evenly "
8619                                 "split on who is right, this is going to "
8620                                 "require user input to fix bytenr %Lu bytes "
8621                                 "%Lu\n", rec->start, rec->nr);
8622                         ret = -EINVAL;
8623                         goto out;
8624                 }
8625         }
8626
8627         /*
8628          * I don't think this can happen currently as we'll abort() if we catch
8629          * this case higher up, but in case somebody removes that we still can't
8630          * deal with it properly here yet, so just bail out of that's the case.
8631          */
8632         if (best->bytenr != rec->start) {
8633                 fprintf(stderr, "Extent start and backref starts don't match, "
8634                         "please use btrfs-image on this file system and send "
8635                         "it to a btrfs developer so they can make fsck fix "
8636                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8637                         rec->start, rec->nr);
8638                 ret = -EINVAL;
8639                 goto out;
8640         }
8641
8642         /*
8643          * Ok great we all agreed on an extent record, let's go find the real
8644          * references and fix up the ones that don't match.
8645          */
8646         list_for_each_entry(back, &rec->backrefs, list) {
8647                 if (back->full_backref || !back->is_data)
8648                         continue;
8649
8650                 dback = to_data_backref(back);
8651
8652                 /*
8653                  * Still ignoring backrefs that don't have a real ref attached
8654                  * to them.
8655                  */
8656                 if (dback->found_ref == 0)
8657                         continue;
8658
8659                 if (dback->bytes == best->bytes &&
8660                     dback->disk_bytenr == best->bytenr)
8661                         continue;
8662
8663                 ret = repair_ref(info, path, dback, best);
8664                 if (ret)
8665                         goto out;
8666         }
8667
8668         /*
8669          * Ok we messed with the actual refs, which means we need to drop our
8670          * entire cache and go back and rescan.  I know this is a huge pain and
8671          * adds a lot of extra work, but it's the only way to be safe.  Once all
8672          * the backrefs agree we may not need to do anything to the extent
8673          * record itself.
8674          */
8675         ret = -EAGAIN;
8676 out:
8677         while (!list_empty(&entries)) {
8678                 entry = list_entry(entries.next, struct extent_entry, list);
8679                 list_del_init(&entry->list);
8680                 free(entry);
8681         }
8682         return ret;
8683 }
8684
8685 static int process_duplicates(struct cache_tree *extent_cache,
8686                               struct extent_record *rec)
8687 {
8688         struct extent_record *good, *tmp;
8689         struct cache_extent *cache;
8690         int ret;
8691
8692         /*
8693          * If we found a extent record for this extent then return, or if we
8694          * have more than one duplicate we are likely going to need to delete
8695          * something.
8696          */
8697         if (rec->found_rec || rec->num_duplicates > 1)
8698                 return 0;
8699
8700         /* Shouldn't happen but just in case */
8701         BUG_ON(!rec->num_duplicates);
8702
8703         /*
8704          * So this happens if we end up with a backref that doesn't match the
8705          * actual extent entry.  So either the backref is bad or the extent
8706          * entry is bad.  Either way we want to have the extent_record actually
8707          * reflect what we found in the extent_tree, so we need to take the
8708          * duplicate out and use that as the extent_record since the only way we
8709          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8710          */
8711         remove_cache_extent(extent_cache, &rec->cache);
8712
8713         good = to_extent_record(rec->dups.next);
8714         list_del_init(&good->list);
8715         INIT_LIST_HEAD(&good->backrefs);
8716         INIT_LIST_HEAD(&good->dups);
8717         good->cache.start = good->start;
8718         good->cache.size = good->nr;
8719         good->content_checked = 0;
8720         good->owner_ref_checked = 0;
8721         good->num_duplicates = 0;
8722         good->refs = rec->refs;
8723         list_splice_init(&rec->backrefs, &good->backrefs);
8724         while (1) {
8725                 cache = lookup_cache_extent(extent_cache, good->start,
8726                                             good->nr);
8727                 if (!cache)
8728                         break;
8729                 tmp = container_of(cache, struct extent_record, cache);
8730
8731                 /*
8732                  * If we find another overlapping extent and it's found_rec is
8733                  * set then it's a duplicate and we need to try and delete
8734                  * something.
8735                  */
8736                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8737                         if (list_empty(&good->list))
8738                                 list_add_tail(&good->list,
8739                                               &duplicate_extents);
8740                         good->num_duplicates += tmp->num_duplicates + 1;
8741                         list_splice_init(&tmp->dups, &good->dups);
8742                         list_del_init(&tmp->list);
8743                         list_add_tail(&tmp->list, &good->dups);
8744                         remove_cache_extent(extent_cache, &tmp->cache);
8745                         continue;
8746                 }
8747
8748                 /*
8749                  * Ok we have another non extent item backed extent rec, so lets
8750                  * just add it to this extent and carry on like we did above.
8751                  */
8752                 good->refs += tmp->refs;
8753                 list_splice_init(&tmp->backrefs, &good->backrefs);
8754                 remove_cache_extent(extent_cache, &tmp->cache);
8755                 free(tmp);
8756         }
8757         ret = insert_cache_extent(extent_cache, &good->cache);
8758         BUG_ON(ret);
8759         free(rec);
8760         return good->num_duplicates ? 0 : 1;
8761 }
8762
8763 static int delete_duplicate_records(struct btrfs_root *root,
8764                                     struct extent_record *rec)
8765 {
8766         struct btrfs_trans_handle *trans;
8767         LIST_HEAD(delete_list);
8768         struct btrfs_path path;
8769         struct extent_record *tmp, *good, *n;
8770         int nr_del = 0;
8771         int ret = 0, err;
8772         struct btrfs_key key;
8773
8774         btrfs_init_path(&path);
8775
8776         good = rec;
8777         /* Find the record that covers all of the duplicates. */
8778         list_for_each_entry(tmp, &rec->dups, list) {
8779                 if (good->start < tmp->start)
8780                         continue;
8781                 if (good->nr > tmp->nr)
8782                         continue;
8783
8784                 if (tmp->start + tmp->nr < good->start + good->nr) {
8785                         fprintf(stderr, "Ok we have overlapping extents that "
8786                                 "aren't completely covered by each other, this "
8787                                 "is going to require more careful thought.  "
8788                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8789                                 tmp->start, tmp->nr, good->start, good->nr);
8790                         abort();
8791                 }
8792                 good = tmp;
8793         }
8794
8795         if (good != rec)
8796                 list_add_tail(&rec->list, &delete_list);
8797
8798         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8799                 if (tmp == good)
8800                         continue;
8801                 list_move_tail(&tmp->list, &delete_list);
8802         }
8803
8804         root = root->fs_info->extent_root;
8805         trans = btrfs_start_transaction(root, 1);
8806         if (IS_ERR(trans)) {
8807                 ret = PTR_ERR(trans);
8808                 goto out;
8809         }
8810
8811         list_for_each_entry(tmp, &delete_list, list) {
8812                 if (tmp->found_rec == 0)
8813                         continue;
8814                 key.objectid = tmp->start;
8815                 key.type = BTRFS_EXTENT_ITEM_KEY;
8816                 key.offset = tmp->nr;
8817
8818                 /* Shouldn't happen but just in case */
8819                 if (tmp->metadata) {
8820                         fprintf(stderr, "Well this shouldn't happen, extent "
8821                                 "record overlaps but is metadata? "
8822                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8823                         abort();
8824                 }
8825
8826                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8827                 if (ret) {
8828                         if (ret > 0)
8829                                 ret = -EINVAL;
8830                         break;
8831                 }
8832                 ret = btrfs_del_item(trans, root, &path);
8833                 if (ret)
8834                         break;
8835                 btrfs_release_path(&path);
8836                 nr_del++;
8837         }
8838         err = btrfs_commit_transaction(trans, root);
8839         if (err && !ret)
8840                 ret = err;
8841 out:
8842         while (!list_empty(&delete_list)) {
8843                 tmp = to_extent_record(delete_list.next);
8844                 list_del_init(&tmp->list);
8845                 if (tmp == rec)
8846                         continue;
8847                 free(tmp);
8848         }
8849
8850         while (!list_empty(&rec->dups)) {
8851                 tmp = to_extent_record(rec->dups.next);
8852                 list_del_init(&tmp->list);
8853                 free(tmp);
8854         }
8855
8856         btrfs_release_path(&path);
8857
8858         if (!ret && !nr_del)
8859                 rec->num_duplicates = 0;
8860
8861         return ret ? ret : nr_del;
8862 }
8863
8864 static int find_possible_backrefs(struct btrfs_fs_info *info,
8865                                   struct btrfs_path *path,
8866                                   struct cache_tree *extent_cache,
8867                                   struct extent_record *rec)
8868 {
8869         struct btrfs_root *root;
8870         struct extent_backref *back;
8871         struct data_backref *dback;
8872         struct cache_extent *cache;
8873         struct btrfs_file_extent_item *fi;
8874         struct btrfs_key key;
8875         u64 bytenr, bytes;
8876         int ret;
8877
8878         list_for_each_entry(back, &rec->backrefs, list) {
8879                 /* Don't care about full backrefs (poor unloved backrefs) */
8880                 if (back->full_backref || !back->is_data)
8881                         continue;
8882
8883                 dback = to_data_backref(back);
8884
8885                 /* We found this one, we don't need to do a lookup */
8886                 if (dback->found_ref)
8887                         continue;
8888
8889                 key.objectid = dback->root;
8890                 key.type = BTRFS_ROOT_ITEM_KEY;
8891                 key.offset = (u64)-1;
8892
8893                 root = btrfs_read_fs_root(info, &key);
8894
8895                 /* No root, definitely a bad ref, skip */
8896                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8897                         continue;
8898                 /* Other err, exit */
8899                 if (IS_ERR(root))
8900                         return PTR_ERR(root);
8901
8902                 key.objectid = dback->owner;
8903                 key.type = BTRFS_EXTENT_DATA_KEY;
8904                 key.offset = dback->offset;
8905                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8906                 if (ret) {
8907                         btrfs_release_path(path);
8908                         if (ret < 0)
8909                                 return ret;
8910                         /* Didn't find it, we can carry on */
8911                         ret = 0;
8912                         continue;
8913                 }
8914
8915                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8916                                     struct btrfs_file_extent_item);
8917                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8918                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8919                 btrfs_release_path(path);
8920                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8921                 if (cache) {
8922                         struct extent_record *tmp;
8923                         tmp = container_of(cache, struct extent_record, cache);
8924
8925                         /*
8926                          * If we found an extent record for the bytenr for this
8927                          * particular backref then we can't add it to our
8928                          * current extent record.  We only want to add backrefs
8929                          * that don't have a corresponding extent item in the
8930                          * extent tree since they likely belong to this record
8931                          * and we need to fix it if it doesn't match bytenrs.
8932                          */
8933                         if  (tmp->found_rec)
8934                                 continue;
8935                 }
8936
8937                 dback->found_ref += 1;
8938                 dback->disk_bytenr = bytenr;
8939                 dback->bytes = bytes;
8940
8941                 /*
8942                  * Set this so the verify backref code knows not to trust the
8943                  * values in this backref.
8944                  */
8945                 back->broken = 1;
8946         }
8947
8948         return 0;
8949 }
8950
8951 /*
8952  * Record orphan data ref into corresponding root.
8953  *
8954  * Return 0 if the extent item contains data ref and recorded.
8955  * Return 1 if the extent item contains no useful data ref
8956  *   On that case, it may contains only shared_dataref or metadata backref
8957  *   or the file extent exists(this should be handled by the extent bytenr
8958  *   recovery routine)
8959  * Return <0 if something goes wrong.
8960  */
8961 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8962                                       struct extent_record *rec)
8963 {
8964         struct btrfs_key key;
8965         struct btrfs_root *dest_root;
8966         struct extent_backref *back;
8967         struct data_backref *dback;
8968         struct orphan_data_extent *orphan;
8969         struct btrfs_path path;
8970         int recorded_data_ref = 0;
8971         int ret = 0;
8972
8973         if (rec->metadata)
8974                 return 1;
8975         btrfs_init_path(&path);
8976         list_for_each_entry(back, &rec->backrefs, list) {
8977                 if (back->full_backref || !back->is_data ||
8978                     !back->found_extent_tree)
8979                         continue;
8980                 dback = to_data_backref(back);
8981                 if (dback->found_ref)
8982                         continue;
8983                 key.objectid = dback->root;
8984                 key.type = BTRFS_ROOT_ITEM_KEY;
8985                 key.offset = (u64)-1;
8986
8987                 dest_root = btrfs_read_fs_root(fs_info, &key);
8988
8989                 /* For non-exist root we just skip it */
8990                 if (IS_ERR(dest_root) || !dest_root)
8991                         continue;
8992
8993                 key.objectid = dback->owner;
8994                 key.type = BTRFS_EXTENT_DATA_KEY;
8995                 key.offset = dback->offset;
8996
8997                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8998                 btrfs_release_path(&path);
8999                 /*
9000                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9001                  * we need to record it for inode/file extent rebuild.
9002                  * For ret > 0, we record it only for file extent rebuild.
9003                  * For ret == 0, the file extent exists but only bytenr
9004                  * mismatch, let the original bytenr fix routine to handle,
9005                  * don't record it.
9006                  */
9007                 if (ret == 0)
9008                         continue;
9009                 ret = 0;
9010                 orphan = malloc(sizeof(*orphan));
9011                 if (!orphan) {
9012                         ret = -ENOMEM;
9013                         goto out;
9014                 }
9015                 INIT_LIST_HEAD(&orphan->list);
9016                 orphan->root = dback->root;
9017                 orphan->objectid = dback->owner;
9018                 orphan->offset = dback->offset;
9019                 orphan->disk_bytenr = rec->cache.start;
9020                 orphan->disk_len = rec->cache.size;
9021                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9022                 recorded_data_ref = 1;
9023         }
9024 out:
9025         btrfs_release_path(&path);
9026         if (!ret)
9027                 return !recorded_data_ref;
9028         else
9029                 return ret;
9030 }
9031
9032 /*
9033  * when an incorrect extent item is found, this will delete
9034  * all of the existing entries for it and recreate them
9035  * based on what the tree scan found.
9036  */
9037 static int fixup_extent_refs(struct btrfs_fs_info *info,
9038                              struct cache_tree *extent_cache,
9039                              struct extent_record *rec)
9040 {
9041         struct btrfs_trans_handle *trans = NULL;
9042         int ret;
9043         struct btrfs_path path;
9044         struct list_head *cur = rec->backrefs.next;
9045         struct cache_extent *cache;
9046         struct extent_backref *back;
9047         int allocated = 0;
9048         u64 flags = 0;
9049
9050         if (rec->flag_block_full_backref)
9051                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9052
9053         btrfs_init_path(&path);
9054         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9055                 /*
9056                  * Sometimes the backrefs themselves are so broken they don't
9057                  * get attached to any meaningful rec, so first go back and
9058                  * check any of our backrefs that we couldn't find and throw
9059                  * them into the list if we find the backref so that
9060                  * verify_backrefs can figure out what to do.
9061                  */
9062                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9063                 if (ret < 0)
9064                         goto out;
9065         }
9066
9067         /* step one, make sure all of the backrefs agree */
9068         ret = verify_backrefs(info, &path, rec);
9069         if (ret < 0)
9070                 goto out;
9071
9072         trans = btrfs_start_transaction(info->extent_root, 1);
9073         if (IS_ERR(trans)) {
9074                 ret = PTR_ERR(trans);
9075                 goto out;
9076         }
9077
9078         /* step two, delete all the existing records */
9079         ret = delete_extent_records(trans, info->extent_root, &path,
9080                                     rec->start);
9081
9082         if (ret < 0)
9083                 goto out;
9084
9085         /* was this block corrupt?  If so, don't add references to it */
9086         cache = lookup_cache_extent(info->corrupt_blocks,
9087                                     rec->start, rec->max_size);
9088         if (cache) {
9089                 ret = 0;
9090                 goto out;
9091         }
9092
9093         /* step three, recreate all the refs we did find */
9094         while(cur != &rec->backrefs) {
9095                 back = to_extent_backref(cur);
9096                 cur = cur->next;
9097
9098                 /*
9099                  * if we didn't find any references, don't create a
9100                  * new extent record
9101                  */
9102                 if (!back->found_ref)
9103                         continue;
9104
9105                 rec->bad_full_backref = 0;
9106                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9107                 allocated = 1;
9108
9109                 if (ret)
9110                         goto out;
9111         }
9112 out:
9113         if (trans) {
9114                 int err = btrfs_commit_transaction(trans, info->extent_root);
9115                 if (!ret)
9116                         ret = err;
9117         }
9118
9119         if (!ret)
9120                 fprintf(stderr, "Repaired extent references for %llu\n",
9121                                 (unsigned long long)rec->start);
9122
9123         btrfs_release_path(&path);
9124         return ret;
9125 }
9126
9127 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9128                               struct extent_record *rec)
9129 {
9130         struct btrfs_trans_handle *trans;
9131         struct btrfs_root *root = fs_info->extent_root;
9132         struct btrfs_path path;
9133         struct btrfs_extent_item *ei;
9134         struct btrfs_key key;
9135         u64 flags;
9136         int ret = 0;
9137
9138         key.objectid = rec->start;
9139         if (rec->metadata) {
9140                 key.type = BTRFS_METADATA_ITEM_KEY;
9141                 key.offset = rec->info_level;
9142         } else {
9143                 key.type = BTRFS_EXTENT_ITEM_KEY;
9144                 key.offset = rec->max_size;
9145         }
9146
9147         trans = btrfs_start_transaction(root, 0);
9148         if (IS_ERR(trans))
9149                 return PTR_ERR(trans);
9150
9151         btrfs_init_path(&path);
9152         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9153         if (ret < 0) {
9154                 btrfs_release_path(&path);
9155                 btrfs_commit_transaction(trans, root);
9156                 return ret;
9157         } else if (ret) {
9158                 fprintf(stderr, "Didn't find extent for %llu\n",
9159                         (unsigned long long)rec->start);
9160                 btrfs_release_path(&path);
9161                 btrfs_commit_transaction(trans, root);
9162                 return -ENOENT;
9163         }
9164
9165         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9166                             struct btrfs_extent_item);
9167         flags = btrfs_extent_flags(path.nodes[0], ei);
9168         if (rec->flag_block_full_backref) {
9169                 fprintf(stderr, "setting full backref on %llu\n",
9170                         (unsigned long long)key.objectid);
9171                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9172         } else {
9173                 fprintf(stderr, "clearing full backref on %llu\n",
9174                         (unsigned long long)key.objectid);
9175                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9176         }
9177         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9178         btrfs_mark_buffer_dirty(path.nodes[0]);
9179         btrfs_release_path(&path);
9180         ret = btrfs_commit_transaction(trans, root);
9181         if (!ret)
9182                 fprintf(stderr, "Repaired extent flags for %llu\n",
9183                                 (unsigned long long)rec->start);
9184
9185         return ret;
9186 }
9187
9188 /* right now we only prune from the extent allocation tree */
9189 static int prune_one_block(struct btrfs_trans_handle *trans,
9190                            struct btrfs_fs_info *info,
9191                            struct btrfs_corrupt_block *corrupt)
9192 {
9193         int ret;
9194         struct btrfs_path path;
9195         struct extent_buffer *eb;
9196         u64 found;
9197         int slot;
9198         int nritems;
9199         int level = corrupt->level + 1;
9200
9201         btrfs_init_path(&path);
9202 again:
9203         /* we want to stop at the parent to our busted block */
9204         path.lowest_level = level;
9205
9206         ret = btrfs_search_slot(trans, info->extent_root,
9207                                 &corrupt->key, &path, -1, 1);
9208
9209         if (ret < 0)
9210                 goto out;
9211
9212         eb = path.nodes[level];
9213         if (!eb) {
9214                 ret = -ENOENT;
9215                 goto out;
9216         }
9217
9218         /*
9219          * hopefully the search gave us the block we want to prune,
9220          * lets try that first
9221          */
9222         slot = path.slots[level];
9223         found =  btrfs_node_blockptr(eb, slot);
9224         if (found == corrupt->cache.start)
9225                 goto del_ptr;
9226
9227         nritems = btrfs_header_nritems(eb);
9228
9229         /* the search failed, lets scan this node and hope we find it */
9230         for (slot = 0; slot < nritems; slot++) {
9231                 found =  btrfs_node_blockptr(eb, slot);
9232                 if (found == corrupt->cache.start)
9233                         goto del_ptr;
9234         }
9235         /*
9236          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9237          * to this block
9238          */
9239         if (eb == info->extent_root->node) {
9240                 ret = -ENOENT;
9241                 goto out;
9242         } else {
9243                 level++;
9244                 btrfs_release_path(&path);
9245                 goto again;
9246         }
9247
9248 del_ptr:
9249         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9250         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9251
9252 out:
9253         btrfs_release_path(&path);
9254         return ret;
9255 }
9256
9257 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9258 {
9259         struct btrfs_trans_handle *trans = NULL;
9260         struct cache_extent *cache;
9261         struct btrfs_corrupt_block *corrupt;
9262
9263         while (1) {
9264                 cache = search_cache_extent(info->corrupt_blocks, 0);
9265                 if (!cache)
9266                         break;
9267                 if (!trans) {
9268                         trans = btrfs_start_transaction(info->extent_root, 1);
9269                         if (IS_ERR(trans))
9270                                 return PTR_ERR(trans);
9271                 }
9272                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9273                 prune_one_block(trans, info, corrupt);
9274                 remove_cache_extent(info->corrupt_blocks, cache);
9275         }
9276         if (trans)
9277                 return btrfs_commit_transaction(trans, info->extent_root);
9278         return 0;
9279 }
9280
9281 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9282 {
9283         struct btrfs_block_group_cache *cache;
9284         u64 start, end;
9285         int ret;
9286
9287         while (1) {
9288                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9289                                             &start, &end, EXTENT_DIRTY);
9290                 if (ret)
9291                         break;
9292                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9293         }
9294
9295         start = 0;
9296         while (1) {
9297                 cache = btrfs_lookup_first_block_group(fs_info, start);
9298                 if (!cache)
9299                         break;
9300                 if (cache->cached)
9301                         cache->cached = 0;
9302                 start = cache->key.objectid + cache->key.offset;
9303         }
9304 }
9305
9306 static int check_extent_refs(struct btrfs_root *root,
9307                              struct cache_tree *extent_cache)
9308 {
9309         struct extent_record *rec;
9310         struct cache_extent *cache;
9311         int ret = 0;
9312         int had_dups = 0;
9313
9314         if (repair) {
9315                 /*
9316                  * if we're doing a repair, we have to make sure
9317                  * we don't allocate from the problem extents.
9318                  * In the worst case, this will be all the
9319                  * extents in the FS
9320                  */
9321                 cache = search_cache_extent(extent_cache, 0);
9322                 while(cache) {
9323                         rec = container_of(cache, struct extent_record, cache);
9324                         set_extent_dirty(root->fs_info->excluded_extents,
9325                                          rec->start,
9326                                          rec->start + rec->max_size - 1);
9327                         cache = next_cache_extent(cache);
9328                 }
9329
9330                 /* pin down all the corrupted blocks too */
9331                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9332                 while(cache) {
9333                         set_extent_dirty(root->fs_info->excluded_extents,
9334                                          cache->start,
9335                                          cache->start + cache->size - 1);
9336                         cache = next_cache_extent(cache);
9337                 }
9338                 prune_corrupt_blocks(root->fs_info);
9339                 reset_cached_block_groups(root->fs_info);
9340         }
9341
9342         reset_cached_block_groups(root->fs_info);
9343
9344         /*
9345          * We need to delete any duplicate entries we find first otherwise we
9346          * could mess up the extent tree when we have backrefs that actually
9347          * belong to a different extent item and not the weird duplicate one.
9348          */
9349         while (repair && !list_empty(&duplicate_extents)) {
9350                 rec = to_extent_record(duplicate_extents.next);
9351                 list_del_init(&rec->list);
9352
9353                 /* Sometimes we can find a backref before we find an actual
9354                  * extent, so we need to process it a little bit to see if there
9355                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9356                  * if this is a backref screwup.  If we need to delete stuff
9357                  * process_duplicates() will return 0, otherwise it will return
9358                  * 1 and we
9359                  */
9360                 if (process_duplicates(extent_cache, rec))
9361                         continue;
9362                 ret = delete_duplicate_records(root, rec);
9363                 if (ret < 0)
9364                         return ret;
9365                 /*
9366                  * delete_duplicate_records will return the number of entries
9367                  * deleted, so if it's greater than 0 then we know we actually
9368                  * did something and we need to remove.
9369                  */
9370                 if (ret)
9371                         had_dups = 1;
9372         }
9373
9374         if (had_dups)
9375                 return -EAGAIN;
9376
9377         while(1) {
9378                 int cur_err = 0;
9379                 int fix = 0;
9380
9381                 cache = search_cache_extent(extent_cache, 0);
9382                 if (!cache)
9383                         break;
9384                 rec = container_of(cache, struct extent_record, cache);
9385                 if (rec->num_duplicates) {
9386                         fprintf(stderr, "extent item %llu has multiple extent "
9387                                 "items\n", (unsigned long long)rec->start);
9388                         cur_err = 1;
9389                 }
9390
9391                 if (rec->refs != rec->extent_item_refs) {
9392                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9393                                 (unsigned long long)rec->start,
9394                                 (unsigned long long)rec->nr);
9395                         fprintf(stderr, "extent item %llu, found %llu\n",
9396                                 (unsigned long long)rec->extent_item_refs,
9397                                 (unsigned long long)rec->refs);
9398                         ret = record_orphan_data_extents(root->fs_info, rec);
9399                         if (ret < 0)
9400                                 goto repair_abort;
9401                         fix = ret;
9402                         cur_err = 1;
9403                 }
9404                 if (all_backpointers_checked(rec, 1)) {
9405                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9406                                 (unsigned long long)rec->start,
9407                                 (unsigned long long)rec->nr);
9408                         fix = 1;
9409                         cur_err = 1;
9410                 }
9411                 if (!rec->owner_ref_checked) {
9412                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9413                                 (unsigned long long)rec->start,
9414                                 (unsigned long long)rec->nr);
9415                         fix = 1;
9416                         cur_err = 1;
9417                 }
9418
9419                 if (repair && fix) {
9420                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9421                         if (ret)
9422                                 goto repair_abort;
9423                 }
9424
9425
9426                 if (rec->bad_full_backref) {
9427                         fprintf(stderr, "bad full backref, on [%llu]\n",
9428                                 (unsigned long long)rec->start);
9429                         if (repair) {
9430                                 ret = fixup_extent_flags(root->fs_info, rec);
9431                                 if (ret)
9432                                         goto repair_abort;
9433                                 fix = 1;
9434                         }
9435                         cur_err = 1;
9436                 }
9437                 /*
9438                  * Although it's not a extent ref's problem, we reuse this
9439                  * routine for error reporting.
9440                  * No repair function yet.
9441                  */
9442                 if (rec->crossing_stripes) {
9443                         fprintf(stderr,
9444                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9445                                 rec->start, rec->start + rec->max_size);
9446                         cur_err = 1;
9447                 }
9448
9449                 if (rec->wrong_chunk_type) {
9450                         fprintf(stderr,
9451                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9452                                 rec->start, rec->start + rec->max_size);
9453                         cur_err = 1;
9454                 }
9455
9456                 remove_cache_extent(extent_cache, cache);
9457                 free_all_extent_backrefs(rec);
9458                 if (!init_extent_tree && repair && (!cur_err || fix))
9459                         clear_extent_dirty(root->fs_info->excluded_extents,
9460                                            rec->start,
9461                                            rec->start + rec->max_size - 1);
9462                 free(rec);
9463         }
9464 repair_abort:
9465         if (repair) {
9466                 if (ret && ret != -EAGAIN) {
9467                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9468                         exit(1);
9469                 } else if (!ret) {
9470                         struct btrfs_trans_handle *trans;
9471
9472                         root = root->fs_info->extent_root;
9473                         trans = btrfs_start_transaction(root, 1);
9474                         if (IS_ERR(trans)) {
9475                                 ret = PTR_ERR(trans);
9476                                 goto repair_abort;
9477                         }
9478
9479                         ret = btrfs_fix_block_accounting(trans, root);
9480                         if (ret)
9481                                 goto repair_abort;
9482                         ret = btrfs_commit_transaction(trans, root);
9483                         if (ret)
9484                                 goto repair_abort;
9485                 }
9486                 return ret;
9487         }
9488         return 0;
9489 }
9490
9491 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9492 {
9493         u64 stripe_size;
9494
9495         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9496                 stripe_size = length;
9497                 stripe_size /= num_stripes;
9498         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9499                 stripe_size = length * 2;
9500                 stripe_size /= num_stripes;
9501         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9502                 stripe_size = length;
9503                 stripe_size /= (num_stripes - 1);
9504         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9505                 stripe_size = length;
9506                 stripe_size /= (num_stripes - 2);
9507         } else {
9508                 stripe_size = length;
9509         }
9510         return stripe_size;
9511 }
9512
9513 /*
9514  * Check the chunk with its block group/dev list ref:
9515  * Return 0 if all refs seems valid.
9516  * Return 1 if part of refs seems valid, need later check for rebuild ref
9517  * like missing block group and needs to search extent tree to rebuild them.
9518  * Return -1 if essential refs are missing and unable to rebuild.
9519  */
9520 static int check_chunk_refs(struct chunk_record *chunk_rec,
9521                             struct block_group_tree *block_group_cache,
9522                             struct device_extent_tree *dev_extent_cache,
9523                             int silent)
9524 {
9525         struct cache_extent *block_group_item;
9526         struct block_group_record *block_group_rec;
9527         struct cache_extent *dev_extent_item;
9528         struct device_extent_record *dev_extent_rec;
9529         u64 devid;
9530         u64 offset;
9531         u64 length;
9532         int metadump_v2 = 0;
9533         int i;
9534         int ret = 0;
9535
9536         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9537                                                chunk_rec->offset,
9538                                                chunk_rec->length);
9539         if (block_group_item) {
9540                 block_group_rec = container_of(block_group_item,
9541                                                struct block_group_record,
9542                                                cache);
9543                 if (chunk_rec->length != block_group_rec->offset ||
9544                     chunk_rec->offset != block_group_rec->objectid ||
9545                     (!metadump_v2 &&
9546                      chunk_rec->type_flags != block_group_rec->flags)) {
9547                         if (!silent)
9548                                 fprintf(stderr,
9549                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9550                                         chunk_rec->objectid,
9551                                         chunk_rec->type,
9552                                         chunk_rec->offset,
9553                                         chunk_rec->length,
9554                                         chunk_rec->offset,
9555                                         chunk_rec->type_flags,
9556                                         block_group_rec->objectid,
9557                                         block_group_rec->type,
9558                                         block_group_rec->offset,
9559                                         block_group_rec->offset,
9560                                         block_group_rec->objectid,
9561                                         block_group_rec->flags);
9562                         ret = -1;
9563                 } else {
9564                         list_del_init(&block_group_rec->list);
9565                         chunk_rec->bg_rec = block_group_rec;
9566                 }
9567         } else {
9568                 if (!silent)
9569                         fprintf(stderr,
9570                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9571                                 chunk_rec->objectid,
9572                                 chunk_rec->type,
9573                                 chunk_rec->offset,
9574                                 chunk_rec->length,
9575                                 chunk_rec->offset,
9576                                 chunk_rec->type_flags);
9577                 ret = 1;
9578         }
9579
9580         if (metadump_v2)
9581                 return ret;
9582
9583         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9584                                     chunk_rec->num_stripes);
9585         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9586                 devid = chunk_rec->stripes[i].devid;
9587                 offset = chunk_rec->stripes[i].offset;
9588                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9589                                                        devid, offset, length);
9590                 if (dev_extent_item) {
9591                         dev_extent_rec = container_of(dev_extent_item,
9592                                                 struct device_extent_record,
9593                                                 cache);
9594                         if (dev_extent_rec->objectid != devid ||
9595                             dev_extent_rec->offset != offset ||
9596                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9597                             dev_extent_rec->length != length) {
9598                                 if (!silent)
9599                                         fprintf(stderr,
9600                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9601                                                 chunk_rec->objectid,
9602                                                 chunk_rec->type,
9603                                                 chunk_rec->offset,
9604                                                 chunk_rec->stripes[i].devid,
9605                                                 chunk_rec->stripes[i].offset,
9606                                                 dev_extent_rec->objectid,
9607                                                 dev_extent_rec->offset,
9608                                                 dev_extent_rec->length);
9609                                 ret = -1;
9610                         } else {
9611                                 list_move(&dev_extent_rec->chunk_list,
9612                                           &chunk_rec->dextents);
9613                         }
9614                 } else {
9615                         if (!silent)
9616                                 fprintf(stderr,
9617                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9618                                         chunk_rec->objectid,
9619                                         chunk_rec->type,
9620                                         chunk_rec->offset,
9621                                         chunk_rec->stripes[i].devid,
9622                                         chunk_rec->stripes[i].offset);
9623                         ret = -1;
9624                 }
9625         }
9626         return ret;
9627 }
9628
9629 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9630 int check_chunks(struct cache_tree *chunk_cache,
9631                  struct block_group_tree *block_group_cache,
9632                  struct device_extent_tree *dev_extent_cache,
9633                  struct list_head *good, struct list_head *bad,
9634                  struct list_head *rebuild, int silent)
9635 {
9636         struct cache_extent *chunk_item;
9637         struct chunk_record *chunk_rec;
9638         struct block_group_record *bg_rec;
9639         struct device_extent_record *dext_rec;
9640         int err;
9641         int ret = 0;
9642
9643         chunk_item = first_cache_extent(chunk_cache);
9644         while (chunk_item) {
9645                 chunk_rec = container_of(chunk_item, struct chunk_record,
9646                                          cache);
9647                 err = check_chunk_refs(chunk_rec, block_group_cache,
9648                                        dev_extent_cache, silent);
9649                 if (err < 0)
9650                         ret = err;
9651                 if (err == 0 && good)
9652                         list_add_tail(&chunk_rec->list, good);
9653                 if (err > 0 && rebuild)
9654                         list_add_tail(&chunk_rec->list, rebuild);
9655                 if (err < 0 && bad)
9656                         list_add_tail(&chunk_rec->list, bad);
9657                 chunk_item = next_cache_extent(chunk_item);
9658         }
9659
9660         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9661                 if (!silent)
9662                         fprintf(stderr,
9663                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9664                                 bg_rec->objectid,
9665                                 bg_rec->offset,
9666                                 bg_rec->flags);
9667                 if (!ret)
9668                         ret = 1;
9669         }
9670
9671         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9672                             chunk_list) {
9673                 if (!silent)
9674                         fprintf(stderr,
9675                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9676                                 dext_rec->objectid,
9677                                 dext_rec->offset,
9678                                 dext_rec->length);
9679                 if (!ret)
9680                         ret = 1;
9681         }
9682         return ret;
9683 }
9684
9685
9686 static int check_device_used(struct device_record *dev_rec,
9687                              struct device_extent_tree *dext_cache)
9688 {
9689         struct cache_extent *cache;
9690         struct device_extent_record *dev_extent_rec;
9691         u64 total_byte = 0;
9692
9693         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9694         while (cache) {
9695                 dev_extent_rec = container_of(cache,
9696                                               struct device_extent_record,
9697                                               cache);
9698                 if (dev_extent_rec->objectid != dev_rec->devid)
9699                         break;
9700
9701                 list_del_init(&dev_extent_rec->device_list);
9702                 total_byte += dev_extent_rec->length;
9703                 cache = next_cache_extent(cache);
9704         }
9705
9706         if (total_byte != dev_rec->byte_used) {
9707                 fprintf(stderr,
9708                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9709                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9710                         dev_rec->type, dev_rec->offset);
9711                 return -1;
9712         } else {
9713                 return 0;
9714         }
9715 }
9716
9717 /* check btrfs_dev_item -> btrfs_dev_extent */
9718 static int check_devices(struct rb_root *dev_cache,
9719                          struct device_extent_tree *dev_extent_cache)
9720 {
9721         struct rb_node *dev_node;
9722         struct device_record *dev_rec;
9723         struct device_extent_record *dext_rec;
9724         int err;
9725         int ret = 0;
9726
9727         dev_node = rb_first(dev_cache);
9728         while (dev_node) {
9729                 dev_rec = container_of(dev_node, struct device_record, node);
9730                 err = check_device_used(dev_rec, dev_extent_cache);
9731                 if (err)
9732                         ret = err;
9733
9734                 dev_node = rb_next(dev_node);
9735         }
9736         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9737                             device_list) {
9738                 fprintf(stderr,
9739                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9740                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9741                 if (!ret)
9742                         ret = 1;
9743         }
9744         return ret;
9745 }
9746
9747 static int add_root_item_to_list(struct list_head *head,
9748                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9749                                   u8 level, u8 drop_level,
9750                                   struct btrfs_key *drop_key)
9751 {
9752
9753         struct root_item_record *ri_rec;
9754         ri_rec = malloc(sizeof(*ri_rec));
9755         if (!ri_rec)
9756                 return -ENOMEM;
9757         ri_rec->bytenr = bytenr;
9758         ri_rec->objectid = objectid;
9759         ri_rec->level = level;
9760         ri_rec->drop_level = drop_level;
9761         ri_rec->last_snapshot = last_snapshot;
9762         if (drop_key)
9763                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9764         list_add_tail(&ri_rec->list, head);
9765
9766         return 0;
9767 }
9768
9769 static void free_root_item_list(struct list_head *list)
9770 {
9771         struct root_item_record *ri_rec;
9772
9773         while (!list_empty(list)) {
9774                 ri_rec = list_first_entry(list, struct root_item_record,
9775                                           list);
9776                 list_del_init(&ri_rec->list);
9777                 free(ri_rec);
9778         }
9779 }
9780
9781 static int deal_root_from_list(struct list_head *list,
9782                                struct btrfs_root *root,
9783                                struct block_info *bits,
9784                                int bits_nr,
9785                                struct cache_tree *pending,
9786                                struct cache_tree *seen,
9787                                struct cache_tree *reada,
9788                                struct cache_tree *nodes,
9789                                struct cache_tree *extent_cache,
9790                                struct cache_tree *chunk_cache,
9791                                struct rb_root *dev_cache,
9792                                struct block_group_tree *block_group_cache,
9793                                struct device_extent_tree *dev_extent_cache)
9794 {
9795         int ret = 0;
9796         u64 last;
9797
9798         while (!list_empty(list)) {
9799                 struct root_item_record *rec;
9800                 struct extent_buffer *buf;
9801                 rec = list_entry(list->next,
9802                                  struct root_item_record, list);
9803                 last = 0;
9804                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9805                 if (!extent_buffer_uptodate(buf)) {
9806                         free_extent_buffer(buf);
9807                         ret = -EIO;
9808                         break;
9809                 }
9810                 ret = add_root_to_pending(buf, extent_cache, pending,
9811                                     seen, nodes, rec->objectid);
9812                 if (ret < 0)
9813                         break;
9814                 /*
9815                  * To rebuild extent tree, we need deal with snapshot
9816                  * one by one, otherwise we deal with node firstly which
9817                  * can maximize readahead.
9818                  */
9819                 while (1) {
9820                         ret = run_next_block(root, bits, bits_nr, &last,
9821                                              pending, seen, reada, nodes,
9822                                              extent_cache, chunk_cache,
9823                                              dev_cache, block_group_cache,
9824                                              dev_extent_cache, rec);
9825                         if (ret != 0)
9826                                 break;
9827                 }
9828                 free_extent_buffer(buf);
9829                 list_del(&rec->list);
9830                 free(rec);
9831                 if (ret < 0)
9832                         break;
9833         }
9834         while (ret >= 0) {
9835                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9836                                      reada, nodes, extent_cache, chunk_cache,
9837                                      dev_cache, block_group_cache,
9838                                      dev_extent_cache, NULL);
9839                 if (ret != 0) {
9840                         if (ret > 0)
9841                                 ret = 0;
9842                         break;
9843                 }
9844         }
9845         return ret;
9846 }
9847
9848 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
9849 {
9850         struct rb_root dev_cache;
9851         struct cache_tree chunk_cache;
9852         struct block_group_tree block_group_cache;
9853         struct device_extent_tree dev_extent_cache;
9854         struct cache_tree extent_cache;
9855         struct cache_tree seen;
9856         struct cache_tree pending;
9857         struct cache_tree reada;
9858         struct cache_tree nodes;
9859         struct extent_io_tree excluded_extents;
9860         struct cache_tree corrupt_blocks;
9861         struct btrfs_path path;
9862         struct btrfs_key key;
9863         struct btrfs_key found_key;
9864         int ret, err = 0;
9865         struct block_info *bits;
9866         int bits_nr;
9867         struct extent_buffer *leaf;
9868         int slot;
9869         struct btrfs_root_item ri;
9870         struct list_head dropping_trees;
9871         struct list_head normal_trees;
9872         struct btrfs_root *root1;
9873         struct btrfs_root *root;
9874         u64 objectid;
9875         u8 level;
9876
9877         root = fs_info->fs_root;
9878         dev_cache = RB_ROOT;
9879         cache_tree_init(&chunk_cache);
9880         block_group_tree_init(&block_group_cache);
9881         device_extent_tree_init(&dev_extent_cache);
9882
9883         cache_tree_init(&extent_cache);
9884         cache_tree_init(&seen);
9885         cache_tree_init(&pending);
9886         cache_tree_init(&nodes);
9887         cache_tree_init(&reada);
9888         cache_tree_init(&corrupt_blocks);
9889         extent_io_tree_init(&excluded_extents);
9890         INIT_LIST_HEAD(&dropping_trees);
9891         INIT_LIST_HEAD(&normal_trees);
9892
9893         if (repair) {
9894                 fs_info->excluded_extents = &excluded_extents;
9895                 fs_info->fsck_extent_cache = &extent_cache;
9896                 fs_info->free_extent_hook = free_extent_hook;
9897                 fs_info->corrupt_blocks = &corrupt_blocks;
9898         }
9899
9900         bits_nr = 1024;
9901         bits = malloc(bits_nr * sizeof(struct block_info));
9902         if (!bits) {
9903                 perror("malloc");
9904                 exit(1);
9905         }
9906
9907         if (ctx.progress_enabled) {
9908                 ctx.tp = TASK_EXTENTS;
9909                 task_start(ctx.info);
9910         }
9911
9912 again:
9913         root1 = fs_info->tree_root;
9914         level = btrfs_header_level(root1->node);
9915         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9916                                     root1->node->start, 0, level, 0, NULL);
9917         if (ret < 0)
9918                 goto out;
9919         root1 = fs_info->chunk_root;
9920         level = btrfs_header_level(root1->node);
9921         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9922                                     root1->node->start, 0, level, 0, NULL);
9923         if (ret < 0)
9924                 goto out;
9925         btrfs_init_path(&path);
9926         key.offset = 0;
9927         key.objectid = 0;
9928         key.type = BTRFS_ROOT_ITEM_KEY;
9929         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
9930         if (ret < 0)
9931                 goto out;
9932         while(1) {
9933                 leaf = path.nodes[0];
9934                 slot = path.slots[0];
9935                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9936                         ret = btrfs_next_leaf(root, &path);
9937                         if (ret != 0)
9938                                 break;
9939                         leaf = path.nodes[0];
9940                         slot = path.slots[0];
9941                 }
9942                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9943                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9944                         unsigned long offset;
9945                         u64 last_snapshot;
9946
9947                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9948                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9949                         last_snapshot = btrfs_root_last_snapshot(&ri);
9950                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9951                                 level = btrfs_root_level(&ri);
9952                                 ret = add_root_item_to_list(&normal_trees,
9953                                                 found_key.objectid,
9954                                                 btrfs_root_bytenr(&ri),
9955                                                 last_snapshot, level,
9956                                                 0, NULL);
9957                                 if (ret < 0)
9958                                         goto out;
9959                         } else {
9960                                 level = btrfs_root_level(&ri);
9961                                 objectid = found_key.objectid;
9962                                 btrfs_disk_key_to_cpu(&found_key,
9963                                                       &ri.drop_progress);
9964                                 ret = add_root_item_to_list(&dropping_trees,
9965                                                 objectid,
9966                                                 btrfs_root_bytenr(&ri),
9967                                                 last_snapshot, level,
9968                                                 ri.drop_level, &found_key);
9969                                 if (ret < 0)
9970                                         goto out;
9971                         }
9972                 }
9973                 path.slots[0]++;
9974         }
9975         btrfs_release_path(&path);
9976
9977         /*
9978          * check_block can return -EAGAIN if it fixes something, please keep
9979          * this in mind when dealing with return values from these functions, if
9980          * we get -EAGAIN we want to fall through and restart the loop.
9981          */
9982         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9983                                   &seen, &reada, &nodes, &extent_cache,
9984                                   &chunk_cache, &dev_cache, &block_group_cache,
9985                                   &dev_extent_cache);
9986         if (ret < 0) {
9987                 if (ret == -EAGAIN)
9988                         goto loop;
9989                 goto out;
9990         }
9991         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9992                                   &pending, &seen, &reada, &nodes,
9993                                   &extent_cache, &chunk_cache, &dev_cache,
9994                                   &block_group_cache, &dev_extent_cache);
9995         if (ret < 0) {
9996                 if (ret == -EAGAIN)
9997                         goto loop;
9998                 goto out;
9999         }
10000
10001         ret = check_chunks(&chunk_cache, &block_group_cache,
10002                            &dev_extent_cache, NULL, NULL, NULL, 0);
10003         if (ret) {
10004                 if (ret == -EAGAIN)
10005                         goto loop;
10006                 err = ret;
10007         }
10008
10009         ret = check_extent_refs(root, &extent_cache);
10010         if (ret < 0) {
10011                 if (ret == -EAGAIN)
10012                         goto loop;
10013                 goto out;
10014         }
10015
10016         ret = check_devices(&dev_cache, &dev_extent_cache);
10017         if (ret && err)
10018                 ret = err;
10019
10020 out:
10021         task_stop(ctx.info);
10022         if (repair) {
10023                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10024                 extent_io_tree_cleanup(&excluded_extents);
10025                 fs_info->fsck_extent_cache = NULL;
10026                 fs_info->free_extent_hook = NULL;
10027                 fs_info->corrupt_blocks = NULL;
10028                 fs_info->excluded_extents = NULL;
10029         }
10030         free(bits);
10031         free_chunk_cache_tree(&chunk_cache);
10032         free_device_cache_tree(&dev_cache);
10033         free_block_group_tree(&block_group_cache);
10034         free_device_extent_tree(&dev_extent_cache);
10035         free_extent_cache_tree(&seen);
10036         free_extent_cache_tree(&pending);
10037         free_extent_cache_tree(&reada);
10038         free_extent_cache_tree(&nodes);
10039         free_root_item_list(&normal_trees);
10040         free_root_item_list(&dropping_trees);
10041         return ret;
10042 loop:
10043         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10044         free_extent_cache_tree(&seen);
10045         free_extent_cache_tree(&pending);
10046         free_extent_cache_tree(&reada);
10047         free_extent_cache_tree(&nodes);
10048         free_chunk_cache_tree(&chunk_cache);
10049         free_block_group_tree(&block_group_cache);
10050         free_device_cache_tree(&dev_cache);
10051         free_device_extent_tree(&dev_extent_cache);
10052         free_extent_record_cache(&extent_cache);
10053         free_root_item_list(&normal_trees);
10054         free_root_item_list(&dropping_trees);
10055         extent_io_tree_cleanup(&excluded_extents);
10056         goto again;
10057 }
10058
10059 /*
10060  * Check backrefs of a tree block given by @bytenr or @eb.
10061  *
10062  * @root:       the root containing the @bytenr or @eb
10063  * @eb:         tree block extent buffer, can be NULL
10064  * @bytenr:     bytenr of the tree block to search
10065  * @level:      tree level of the tree block
10066  * @owner:      owner of the tree block
10067  *
10068  * Return >0 for any error found and output error message
10069  * Return 0 for no error found
10070  */
10071 static int check_tree_block_ref(struct btrfs_root *root,
10072                                 struct extent_buffer *eb, u64 bytenr,
10073                                 int level, u64 owner)
10074 {
10075         struct btrfs_key key;
10076         struct btrfs_root *extent_root = root->fs_info->extent_root;
10077         struct btrfs_path path;
10078         struct btrfs_extent_item *ei;
10079         struct btrfs_extent_inline_ref *iref;
10080         struct extent_buffer *leaf;
10081         unsigned long end;
10082         unsigned long ptr;
10083         int slot;
10084         int skinny_level;
10085         int type;
10086         u32 nodesize = root->fs_info->nodesize;
10087         u32 item_size;
10088         u64 offset;
10089         int tree_reloc_root = 0;
10090         int found_ref = 0;
10091         int err = 0;
10092         int ret;
10093
10094         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10095             btrfs_header_bytenr(root->node) == bytenr)
10096                 tree_reloc_root = 1;
10097
10098         btrfs_init_path(&path);
10099         key.objectid = bytenr;
10100         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10101                 key.type = BTRFS_METADATA_ITEM_KEY;
10102         else
10103                 key.type = BTRFS_EXTENT_ITEM_KEY;
10104         key.offset = (u64)-1;
10105
10106         /* Search for the backref in extent tree */
10107         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10108         if (ret < 0) {
10109                 err |= BACKREF_MISSING;
10110                 goto out;
10111         }
10112         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10113         if (ret) {
10114                 err |= BACKREF_MISSING;
10115                 goto out;
10116         }
10117
10118         leaf = path.nodes[0];
10119         slot = path.slots[0];
10120         btrfs_item_key_to_cpu(leaf, &key, slot);
10121
10122         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10123
10124         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10125                 skinny_level = (int)key.offset;
10126                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10127         } else {
10128                 struct btrfs_tree_block_info *info;
10129
10130                 info = (struct btrfs_tree_block_info *)(ei + 1);
10131                 skinny_level = btrfs_tree_block_level(leaf, info);
10132                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10133         }
10134
10135         if (eb) {
10136                 u64 header_gen;
10137                 u64 extent_gen;
10138
10139                 if (!(btrfs_extent_flags(leaf, ei) &
10140                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10141                         error(
10142                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10143                                 key.objectid, nodesize,
10144                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10145                         err = BACKREF_MISMATCH;
10146                 }
10147                 header_gen = btrfs_header_generation(eb);
10148                 extent_gen = btrfs_extent_generation(leaf, ei);
10149                 if (header_gen != extent_gen) {
10150                         error(
10151         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10152                                 key.objectid, nodesize, header_gen,
10153                                 extent_gen);
10154                         err = BACKREF_MISMATCH;
10155                 }
10156                 if (level != skinny_level) {
10157                         error(
10158                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10159                                 key.objectid, nodesize, level, skinny_level);
10160                         err = BACKREF_MISMATCH;
10161                 }
10162                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10163                         error(
10164                         "extent[%llu %u] is referred by other roots than %llu",
10165                                 key.objectid, nodesize, root->objectid);
10166                         err = BACKREF_MISMATCH;
10167                 }
10168         }
10169
10170         /*
10171          * Iterate the extent/metadata item to find the exact backref
10172          */
10173         item_size = btrfs_item_size_nr(leaf, slot);
10174         ptr = (unsigned long)iref;
10175         end = (unsigned long)ei + item_size;
10176         while (ptr < end) {
10177                 iref = (struct btrfs_extent_inline_ref *)ptr;
10178                 type = btrfs_extent_inline_ref_type(leaf, iref);
10179                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10180
10181                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10182                         (offset == root->objectid || offset == owner)) {
10183                         found_ref = 1;
10184                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10185                         /*
10186                          * Backref of tree reloc root points to itself, no need
10187                          * to check backref any more.
10188                          */
10189                         if (tree_reloc_root)
10190                                 found_ref = 1;
10191                         else
10192                         /* Check if the backref points to valid referencer */
10193                                 found_ref = !check_tree_block_ref(root, NULL,
10194                                                 offset, level + 1, owner);
10195                 }
10196
10197                 if (found_ref)
10198                         break;
10199                 ptr += btrfs_extent_inline_ref_size(type);
10200         }
10201
10202         /*
10203          * Inlined extent item doesn't have what we need, check
10204          * TREE_BLOCK_REF_KEY
10205          */
10206         if (!found_ref) {
10207                 btrfs_release_path(&path);
10208                 key.objectid = bytenr;
10209                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10210                 key.offset = root->objectid;
10211
10212                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10213                 if (!ret)
10214                         found_ref = 1;
10215         }
10216         if (!found_ref)
10217                 err |= BACKREF_MISSING;
10218 out:
10219         btrfs_release_path(&path);
10220         if (eb && (err & BACKREF_MISSING))
10221                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10222                         bytenr, nodesize, owner, level);
10223         return err;
10224 }
10225
10226 /*
10227  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10228  *
10229  * Return >0 any error found and output error message
10230  * Return 0 for no error found
10231  */
10232 static int check_extent_data_item(struct btrfs_root *root,
10233                                   struct extent_buffer *eb, int slot)
10234 {
10235         struct btrfs_file_extent_item *fi;
10236         struct btrfs_path path;
10237         struct btrfs_root *extent_root = root->fs_info->extent_root;
10238         struct btrfs_key fi_key;
10239         struct btrfs_key dbref_key;
10240         struct extent_buffer *leaf;
10241         struct btrfs_extent_item *ei;
10242         struct btrfs_extent_inline_ref *iref;
10243         struct btrfs_extent_data_ref *dref;
10244         u64 owner;
10245         u64 disk_bytenr;
10246         u64 disk_num_bytes;
10247         u64 extent_num_bytes;
10248         u64 extent_flags;
10249         u32 item_size;
10250         unsigned long end;
10251         unsigned long ptr;
10252         int type;
10253         u64 ref_root;
10254         int found_dbackref = 0;
10255         int err = 0;
10256         int ret;
10257
10258         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10259         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10260
10261         /* Nothing to check for hole and inline data extents */
10262         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10263             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10264                 return 0;
10265
10266         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10267         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10268         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10269
10270         /* Check unaligned disk_num_bytes and num_bytes */
10271         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10272                 error(
10273 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10274                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10275                         root->fs_info->sectorsize);
10276                 err |= BYTES_UNALIGNED;
10277         } else {
10278                 data_bytes_allocated += disk_num_bytes;
10279         }
10280         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10281                 error(
10282 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10283                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10284                         root->fs_info->sectorsize);
10285                 err |= BYTES_UNALIGNED;
10286         } else {
10287                 data_bytes_referenced += extent_num_bytes;
10288         }
10289         owner = btrfs_header_owner(eb);
10290
10291         /* Check the extent item of the file extent in extent tree */
10292         btrfs_init_path(&path);
10293         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10294         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10295         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10296
10297         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10298         if (ret)
10299                 goto out;
10300
10301         leaf = path.nodes[0];
10302         slot = path.slots[0];
10303         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10304
10305         extent_flags = btrfs_extent_flags(leaf, ei);
10306
10307         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10308                 error(
10309                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10310                     disk_bytenr, disk_num_bytes,
10311                     BTRFS_EXTENT_FLAG_DATA);
10312                 err |= BACKREF_MISMATCH;
10313         }
10314
10315         /* Check data backref inside that extent item */
10316         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10317         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10318         ptr = (unsigned long)iref;
10319         end = (unsigned long)ei + item_size;
10320         while (ptr < end) {
10321                 iref = (struct btrfs_extent_inline_ref *)ptr;
10322                 type = btrfs_extent_inline_ref_type(leaf, iref);
10323                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10324
10325                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10326                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10327                         if (ref_root == owner || ref_root == root->objectid)
10328                                 found_dbackref = 1;
10329                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10330                         found_dbackref = !check_tree_block_ref(root, NULL,
10331                                 btrfs_extent_inline_ref_offset(leaf, iref),
10332                                 0, owner);
10333                 }
10334
10335                 if (found_dbackref)
10336                         break;
10337                 ptr += btrfs_extent_inline_ref_size(type);
10338         }
10339
10340         if (!found_dbackref) {
10341                 btrfs_release_path(&path);
10342
10343                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10344                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10345                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10346                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10347                                 fi_key.objectid, fi_key.offset);
10348
10349                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10350                                         &dbref_key, &path, 0, 0);
10351                 if (!ret) {
10352                         found_dbackref = 1;
10353                         goto out;
10354                 }
10355
10356                 btrfs_release_path(&path);
10357
10358                 /*
10359                  * Neither inlined nor EXTENT_DATA_REF found, try
10360                  * SHARED_DATA_REF as last chance.
10361                  */
10362                 dbref_key.objectid = disk_bytenr;
10363                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10364                 dbref_key.offset = eb->start;
10365
10366                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10367                                         &dbref_key, &path, 0, 0);
10368                 if (!ret) {
10369                         found_dbackref = 1;
10370                         goto out;
10371                 }
10372         }
10373
10374 out:
10375         if (!found_dbackref)
10376                 err |= BACKREF_MISSING;
10377         btrfs_release_path(&path);
10378         if (err & BACKREF_MISSING) {
10379                 error("data extent[%llu %llu] backref lost",
10380                       disk_bytenr, disk_num_bytes);
10381         }
10382         return err;
10383 }
10384
10385 /*
10386  * Get real tree block level for the case like shared block
10387  * Return >= 0 as tree level
10388  * Return <0 for error
10389  */
10390 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10391 {
10392         struct extent_buffer *eb;
10393         struct btrfs_path path;
10394         struct btrfs_key key;
10395         struct btrfs_extent_item *ei;
10396         u64 flags;
10397         u64 transid;
10398         u8 backref_level;
10399         u8 header_level;
10400         int ret;
10401
10402         /* Search extent tree for extent generation and level */
10403         key.objectid = bytenr;
10404         key.type = BTRFS_METADATA_ITEM_KEY;
10405         key.offset = (u64)-1;
10406
10407         btrfs_init_path(&path);
10408         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10409         if (ret < 0)
10410                 goto release_out;
10411         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10412         if (ret < 0)
10413                 goto release_out;
10414         if (ret > 0) {
10415                 ret = -ENOENT;
10416                 goto release_out;
10417         }
10418
10419         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10420         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10421                             struct btrfs_extent_item);
10422         flags = btrfs_extent_flags(path.nodes[0], ei);
10423         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10424                 ret = -ENOENT;
10425                 goto release_out;
10426         }
10427
10428         /* Get transid for later read_tree_block() check */
10429         transid = btrfs_extent_generation(path.nodes[0], ei);
10430
10431         /* Get backref level as one source */
10432         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10433                 backref_level = key.offset;
10434         } else {
10435                 struct btrfs_tree_block_info *info;
10436
10437                 info = (struct btrfs_tree_block_info *)(ei + 1);
10438                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10439         }
10440         btrfs_release_path(&path);
10441
10442         /* Get level from tree block as an alternative source */
10443         eb = read_tree_block(fs_info, bytenr, transid);
10444         if (!extent_buffer_uptodate(eb)) {
10445                 free_extent_buffer(eb);
10446                 return -EIO;
10447         }
10448         header_level = btrfs_header_level(eb);
10449         free_extent_buffer(eb);
10450
10451         if (header_level != backref_level)
10452                 return -EIO;
10453         return header_level;
10454
10455 release_out:
10456         btrfs_release_path(&path);
10457         return ret;
10458 }
10459
10460 /*
10461  * Check if a tree block backref is valid (points to a valid tree block)
10462  * if level == -1, level will be resolved
10463  * Return >0 for any error found and print error message
10464  */
10465 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10466                                     u64 bytenr, int level)
10467 {
10468         struct btrfs_root *root;
10469         struct btrfs_key key;
10470         struct btrfs_path path;
10471         struct extent_buffer *eb;
10472         struct extent_buffer *node;
10473         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10474         int err = 0;
10475         int ret;
10476
10477         /* Query level for level == -1 special case */
10478         if (level == -1)
10479                 level = query_tree_block_level(fs_info, bytenr);
10480         if (level < 0) {
10481                 err |= REFERENCER_MISSING;
10482                 goto out;
10483         }
10484
10485         key.objectid = root_id;
10486         key.type = BTRFS_ROOT_ITEM_KEY;
10487         key.offset = (u64)-1;
10488
10489         root = btrfs_read_fs_root(fs_info, &key);
10490         if (IS_ERR(root)) {
10491                 err |= REFERENCER_MISSING;
10492                 goto out;
10493         }
10494
10495         /* Read out the tree block to get item/node key */
10496         eb = read_tree_block(fs_info, bytenr, 0);
10497         if (!extent_buffer_uptodate(eb)) {
10498                 err |= REFERENCER_MISSING;
10499                 free_extent_buffer(eb);
10500                 goto out;
10501         }
10502
10503         /* Empty tree, no need to check key */
10504         if (!btrfs_header_nritems(eb) && !level) {
10505                 free_extent_buffer(eb);
10506                 goto out;
10507         }
10508
10509         if (level)
10510                 btrfs_node_key_to_cpu(eb, &key, 0);
10511         else
10512                 btrfs_item_key_to_cpu(eb, &key, 0);
10513
10514         free_extent_buffer(eb);
10515
10516         btrfs_init_path(&path);
10517         path.lowest_level = level;
10518         /* Search with the first key, to ensure we can reach it */
10519         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10520         if (ret < 0) {
10521                 err |= REFERENCER_MISSING;
10522                 goto release_out;
10523         }
10524
10525         node = path.nodes[level];
10526         if (btrfs_header_bytenr(node) != bytenr) {
10527                 error(
10528         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10529                         bytenr, nodesize, bytenr,
10530                         btrfs_header_bytenr(node));
10531                 err |= REFERENCER_MISMATCH;
10532         }
10533         if (btrfs_header_level(node) != level) {
10534                 error(
10535         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10536                         bytenr, nodesize, level,
10537                         btrfs_header_level(node));
10538                 err |= REFERENCER_MISMATCH;
10539         }
10540
10541 release_out:
10542         btrfs_release_path(&path);
10543 out:
10544         if (err & REFERENCER_MISSING) {
10545                 if (level < 0)
10546                         error("extent [%llu %d] lost referencer (owner: %llu)",
10547                                 bytenr, nodesize, root_id);
10548                 else
10549                         error(
10550                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10551                                 bytenr, nodesize, root_id, level);
10552         }
10553
10554         return err;
10555 }
10556
10557 /*
10558  * Check if tree block @eb is tree reloc root.
10559  * Return 0 if it's not or any problem happens
10560  * Return 1 if it's a tree reloc root
10561  */
10562 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10563                                  struct extent_buffer *eb)
10564 {
10565         struct btrfs_root *tree_reloc_root;
10566         struct btrfs_key key;
10567         u64 bytenr = btrfs_header_bytenr(eb);
10568         u64 owner = btrfs_header_owner(eb);
10569         int ret = 0;
10570
10571         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10572         key.offset = owner;
10573         key.type = BTRFS_ROOT_ITEM_KEY;
10574
10575         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10576         if (IS_ERR(tree_reloc_root))
10577                 return 0;
10578
10579         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10580                 ret = 1;
10581         btrfs_free_fs_root(tree_reloc_root);
10582         return ret;
10583 }
10584
10585 /*
10586  * Check referencer for shared block backref
10587  * If level == -1, this function will resolve the level.
10588  */
10589 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10590                                      u64 parent, u64 bytenr, int level)
10591 {
10592         struct extent_buffer *eb;
10593         u32 nr;
10594         int found_parent = 0;
10595         int i;
10596
10597         eb = read_tree_block(fs_info, parent, 0);
10598         if (!extent_buffer_uptodate(eb))
10599                 goto out;
10600
10601         if (level == -1)
10602                 level = query_tree_block_level(fs_info, bytenr);
10603         if (level < 0)
10604                 goto out;
10605
10606         /* It's possible it's a tree reloc root */
10607         if (parent == bytenr) {
10608                 if (is_tree_reloc_root(fs_info, eb))
10609                         found_parent = 1;
10610                 goto out;
10611         }
10612
10613         if (level + 1 != btrfs_header_level(eb))
10614                 goto out;
10615
10616         nr = btrfs_header_nritems(eb);
10617         for (i = 0; i < nr; i++) {
10618                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10619                         found_parent = 1;
10620                         break;
10621                 }
10622         }
10623 out:
10624         free_extent_buffer(eb);
10625         if (!found_parent) {
10626                 error(
10627         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10628                         bytenr, fs_info->nodesize, parent, level);
10629                 return REFERENCER_MISSING;
10630         }
10631         return 0;
10632 }
10633
10634 /*
10635  * Check referencer for normal (inlined) data ref
10636  * If len == 0, it will be resolved by searching in extent tree
10637  */
10638 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10639                                      u64 root_id, u64 objectid, u64 offset,
10640                                      u64 bytenr, u64 len, u32 count)
10641 {
10642         struct btrfs_root *root;
10643         struct btrfs_root *extent_root = fs_info->extent_root;
10644         struct btrfs_key key;
10645         struct btrfs_path path;
10646         struct extent_buffer *leaf;
10647         struct btrfs_file_extent_item *fi;
10648         u32 found_count = 0;
10649         int slot;
10650         int ret = 0;
10651
10652         if (!len) {
10653                 key.objectid = bytenr;
10654                 key.type = BTRFS_EXTENT_ITEM_KEY;
10655                 key.offset = (u64)-1;
10656
10657                 btrfs_init_path(&path);
10658                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10659                 if (ret < 0)
10660                         goto out;
10661                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10662                 if (ret)
10663                         goto out;
10664                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10665                 if (key.objectid != bytenr ||
10666                     key.type != BTRFS_EXTENT_ITEM_KEY)
10667                         goto out;
10668                 len = key.offset;
10669                 btrfs_release_path(&path);
10670         }
10671         key.objectid = root_id;
10672         key.type = BTRFS_ROOT_ITEM_KEY;
10673         key.offset = (u64)-1;
10674         btrfs_init_path(&path);
10675
10676         root = btrfs_read_fs_root(fs_info, &key);
10677         if (IS_ERR(root))
10678                 goto out;
10679
10680         key.objectid = objectid;
10681         key.type = BTRFS_EXTENT_DATA_KEY;
10682         /*
10683          * It can be nasty as data backref offset is
10684          * file offset - file extent offset, which is smaller or
10685          * equal to original backref offset.  The only special case is
10686          * overflow.  So we need to special check and do further search.
10687          */
10688         key.offset = offset & (1ULL << 63) ? 0 : offset;
10689
10690         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10691         if (ret < 0)
10692                 goto out;
10693
10694         /*
10695          * Search afterwards to get correct one
10696          * NOTE: As we must do a comprehensive check on the data backref to
10697          * make sure the dref count also matches, we must iterate all file
10698          * extents for that inode.
10699          */
10700         while (1) {
10701                 leaf = path.nodes[0];
10702                 slot = path.slots[0];
10703
10704                 if (slot >= btrfs_header_nritems(leaf))
10705                         goto next;
10706                 btrfs_item_key_to_cpu(leaf, &key, slot);
10707                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10708                         break;
10709                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10710                 /*
10711                  * Except normal disk bytenr and disk num bytes, we still
10712                  * need to do extra check on dbackref offset as
10713                  * dbackref offset = file_offset - file_extent_offset
10714                  */
10715                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10716                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10717                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10718                     offset)
10719                         found_count++;
10720
10721 next:
10722                 ret = btrfs_next_item(root, &path);
10723                 if (ret)
10724                         break;
10725         }
10726 out:
10727         btrfs_release_path(&path);
10728         if (found_count != count) {
10729                 error(
10730 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10731                         bytenr, len, root_id, objectid, offset, count, found_count);
10732                 return REFERENCER_MISSING;
10733         }
10734         return 0;
10735 }
10736
10737 /*
10738  * Check if the referencer of a shared data backref exists
10739  */
10740 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10741                                      u64 parent, u64 bytenr)
10742 {
10743         struct extent_buffer *eb;
10744         struct btrfs_key key;
10745         struct btrfs_file_extent_item *fi;
10746         u32 nr;
10747         int found_parent = 0;
10748         int i;
10749
10750         eb = read_tree_block(fs_info, parent, 0);
10751         if (!extent_buffer_uptodate(eb))
10752                 goto out;
10753
10754         nr = btrfs_header_nritems(eb);
10755         for (i = 0; i < nr; i++) {
10756                 btrfs_item_key_to_cpu(eb, &key, i);
10757                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10758                         continue;
10759
10760                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10761                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10762                         continue;
10763
10764                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10765                         found_parent = 1;
10766                         break;
10767                 }
10768         }
10769
10770 out:
10771         free_extent_buffer(eb);
10772         if (!found_parent) {
10773                 error("shared extent %llu referencer lost (parent: %llu)",
10774                         bytenr, parent);
10775                 return REFERENCER_MISSING;
10776         }
10777         return 0;
10778 }
10779
10780 /*
10781  * This function will check a given extent item, including its backref and
10782  * itself (like crossing stripe boundary and type)
10783  *
10784  * Since we don't use extent_record anymore, introduce new error bit
10785  */
10786 static int check_extent_item(struct btrfs_fs_info *fs_info,
10787                              struct extent_buffer *eb, int slot)
10788 {
10789         struct btrfs_extent_item *ei;
10790         struct btrfs_extent_inline_ref *iref;
10791         struct btrfs_extent_data_ref *dref;
10792         unsigned long end;
10793         unsigned long ptr;
10794         int type;
10795         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10796         u32 item_size = btrfs_item_size_nr(eb, slot);
10797         u64 flags;
10798         u64 offset;
10799         int metadata = 0;
10800         int level;
10801         struct btrfs_key key;
10802         int ret;
10803         int err = 0;
10804
10805         btrfs_item_key_to_cpu(eb, &key, slot);
10806         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10807                 bytes_used += key.offset;
10808         else
10809                 bytes_used += nodesize;
10810
10811         if (item_size < sizeof(*ei)) {
10812                 /*
10813                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10814                  * old thing when on disk format is still un-determined.
10815                  * No need to care about it anymore
10816                  */
10817                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10818                 return -ENOTTY;
10819         }
10820
10821         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10822         flags = btrfs_extent_flags(eb, ei);
10823
10824         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10825                 metadata = 1;
10826         if (metadata && check_crossing_stripes(global_info, key.objectid,
10827                                                eb->len)) {
10828                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10829                       key.objectid, key.objectid + nodesize);
10830                 err |= CROSSING_STRIPE_BOUNDARY;
10831         }
10832
10833         ptr = (unsigned long)(ei + 1);
10834
10835         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10836                 /* Old EXTENT_ITEM metadata */
10837                 struct btrfs_tree_block_info *info;
10838
10839                 info = (struct btrfs_tree_block_info *)ptr;
10840                 level = btrfs_tree_block_level(eb, info);
10841                 ptr += sizeof(struct btrfs_tree_block_info);
10842         } else {
10843                 /* New METADATA_ITEM */
10844                 level = key.offset;
10845         }
10846         end = (unsigned long)ei + item_size;
10847
10848 next:
10849         /* Reached extent item end normally */
10850         if (ptr == end)
10851                 goto out;
10852
10853         /* Beyond extent item end, wrong item size */
10854         if (ptr > end) {
10855                 err |= ITEM_SIZE_MISMATCH;
10856                 error("extent item at bytenr %llu slot %d has wrong size",
10857                         eb->start, slot);
10858                 goto out;
10859         }
10860
10861         /* Now check every backref in this extent item */
10862         iref = (struct btrfs_extent_inline_ref *)ptr;
10863         type = btrfs_extent_inline_ref_type(eb, iref);
10864         offset = btrfs_extent_inline_ref_offset(eb, iref);
10865         switch (type) {
10866         case BTRFS_TREE_BLOCK_REF_KEY:
10867                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10868                                                level);
10869                 err |= ret;
10870                 break;
10871         case BTRFS_SHARED_BLOCK_REF_KEY:
10872                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10873                                                  level);
10874                 err |= ret;
10875                 break;
10876         case BTRFS_EXTENT_DATA_REF_KEY:
10877                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10878                 ret = check_extent_data_backref(fs_info,
10879                                 btrfs_extent_data_ref_root(eb, dref),
10880                                 btrfs_extent_data_ref_objectid(eb, dref),
10881                                 btrfs_extent_data_ref_offset(eb, dref),
10882                                 key.objectid, key.offset,
10883                                 btrfs_extent_data_ref_count(eb, dref));
10884                 err |= ret;
10885                 break;
10886         case BTRFS_SHARED_DATA_REF_KEY:
10887                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10888                 err |= ret;
10889                 break;
10890         default:
10891                 error("extent[%llu %d %llu] has unknown ref type: %d",
10892                         key.objectid, key.type, key.offset, type);
10893                 err |= UNKNOWN_TYPE;
10894                 goto out;
10895         }
10896
10897         ptr += btrfs_extent_inline_ref_size(type);
10898         goto next;
10899
10900 out:
10901         return err;
10902 }
10903
10904 /*
10905  * Check if a dev extent item is referred correctly by its chunk
10906  */
10907 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10908                                  struct extent_buffer *eb, int slot)
10909 {
10910         struct btrfs_root *chunk_root = fs_info->chunk_root;
10911         struct btrfs_dev_extent *ptr;
10912         struct btrfs_path path;
10913         struct btrfs_key chunk_key;
10914         struct btrfs_key devext_key;
10915         struct btrfs_chunk *chunk;
10916         struct extent_buffer *l;
10917         int num_stripes;
10918         u64 length;
10919         int i;
10920         int found_chunk = 0;
10921         int ret;
10922
10923         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10924         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10925         length = btrfs_dev_extent_length(eb, ptr);
10926
10927         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10928         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10929         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10930
10931         btrfs_init_path(&path);
10932         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10933         if (ret)
10934                 goto out;
10935
10936         l = path.nodes[0];
10937         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10938         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10939                                       chunk_key.offset);
10940         if (ret < 0)
10941                 goto out;
10942
10943         if (btrfs_stripe_length(fs_info, l, chunk) != length)
10944                 goto out;
10945
10946         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10947         for (i = 0; i < num_stripes; i++) {
10948                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10949                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10950
10951                 if (devid == devext_key.objectid &&
10952                     offset == devext_key.offset) {
10953                         found_chunk = 1;
10954                         break;
10955                 }
10956         }
10957 out:
10958         btrfs_release_path(&path);
10959         if (!found_chunk) {
10960                 error(
10961                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10962                         devext_key.objectid, devext_key.offset, length);
10963                 return REFERENCER_MISSING;
10964         }
10965         return 0;
10966 }
10967
10968 /*
10969  * Check if the used space is correct with the dev item
10970  */
10971 static int check_dev_item(struct btrfs_fs_info *fs_info,
10972                           struct extent_buffer *eb, int slot)
10973 {
10974         struct btrfs_root *dev_root = fs_info->dev_root;
10975         struct btrfs_dev_item *dev_item;
10976         struct btrfs_path path;
10977         struct btrfs_key key;
10978         struct btrfs_dev_extent *ptr;
10979         u64 dev_id;
10980         u64 used;
10981         u64 total = 0;
10982         int ret;
10983
10984         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10985         dev_id = btrfs_device_id(eb, dev_item);
10986         used = btrfs_device_bytes_used(eb, dev_item);
10987
10988         key.objectid = dev_id;
10989         key.type = BTRFS_DEV_EXTENT_KEY;
10990         key.offset = 0;
10991
10992         btrfs_init_path(&path);
10993         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10994         if (ret < 0) {
10995                 btrfs_item_key_to_cpu(eb, &key, slot);
10996                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10997                         key.objectid, key.type, key.offset);
10998                 btrfs_release_path(&path);
10999                 return REFERENCER_MISSING;
11000         }
11001
11002         /* Iterate dev_extents to calculate the used space of a device */
11003         while (1) {
11004                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11005                         goto next;
11006
11007                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11008                 if (key.objectid > dev_id)
11009                         break;
11010                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11011                         goto next;
11012
11013                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11014                                      struct btrfs_dev_extent);
11015                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11016 next:
11017                 ret = btrfs_next_item(dev_root, &path);
11018                 if (ret)
11019                         break;
11020         }
11021         btrfs_release_path(&path);
11022
11023         if (used != total) {
11024                 btrfs_item_key_to_cpu(eb, &key, slot);
11025                 error(
11026 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11027                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11028                         BTRFS_DEV_EXTENT_KEY, dev_id);
11029                 return ACCOUNTING_MISMATCH;
11030         }
11031         return 0;
11032 }
11033
11034 /*
11035  * Check a block group item with its referener (chunk) and its used space
11036  * with extent/metadata item
11037  */
11038 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11039                                   struct extent_buffer *eb, int slot)
11040 {
11041         struct btrfs_root *extent_root = fs_info->extent_root;
11042         struct btrfs_root *chunk_root = fs_info->chunk_root;
11043         struct btrfs_block_group_item *bi;
11044         struct btrfs_block_group_item bg_item;
11045         struct btrfs_path path;
11046         struct btrfs_key bg_key;
11047         struct btrfs_key chunk_key;
11048         struct btrfs_key extent_key;
11049         struct btrfs_chunk *chunk;
11050         struct extent_buffer *leaf;
11051         struct btrfs_extent_item *ei;
11052         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11053         u64 flags;
11054         u64 bg_flags;
11055         u64 used;
11056         u64 total = 0;
11057         int ret;
11058         int err = 0;
11059
11060         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11061         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11062         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11063         used = btrfs_block_group_used(&bg_item);
11064         bg_flags = btrfs_block_group_flags(&bg_item);
11065
11066         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11067         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11068         chunk_key.offset = bg_key.objectid;
11069
11070         btrfs_init_path(&path);
11071         /* Search for the referencer chunk */
11072         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11073         if (ret) {
11074                 error(
11075                 "block group[%llu %llu] did not find the related chunk item",
11076                         bg_key.objectid, bg_key.offset);
11077                 err |= REFERENCER_MISSING;
11078         } else {
11079                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11080                                         struct btrfs_chunk);
11081                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11082                                                 bg_key.offset) {
11083                         error(
11084         "block group[%llu %llu] related chunk item length does not match",
11085                                 bg_key.objectid, bg_key.offset);
11086                         err |= REFERENCER_MISMATCH;
11087                 }
11088         }
11089         btrfs_release_path(&path);
11090
11091         /* Search from the block group bytenr */
11092         extent_key.objectid = bg_key.objectid;
11093         extent_key.type = 0;
11094         extent_key.offset = 0;
11095
11096         btrfs_init_path(&path);
11097         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11098         if (ret < 0)
11099                 goto out;
11100
11101         /* Iterate extent tree to account used space */
11102         while (1) {
11103                 leaf = path.nodes[0];
11104
11105                 /* Search slot can point to the last item beyond leaf nritems */
11106                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11107                         goto next;
11108
11109                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11110                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11111                         break;
11112
11113                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11114                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11115                         goto next;
11116                 if (extent_key.objectid < bg_key.objectid)
11117                         goto next;
11118
11119                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11120                         total += nodesize;
11121                 else
11122                         total += extent_key.offset;
11123
11124                 ei = btrfs_item_ptr(leaf, path.slots[0],
11125                                     struct btrfs_extent_item);
11126                 flags = btrfs_extent_flags(leaf, ei);
11127                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11128                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11129                                 error(
11130                         "bad extent[%llu, %llu) type mismatch with chunk",
11131                                         extent_key.objectid,
11132                                         extent_key.objectid + extent_key.offset);
11133                                 err |= CHUNK_TYPE_MISMATCH;
11134                         }
11135                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11136                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11137                                     BTRFS_BLOCK_GROUP_METADATA))) {
11138                                 error(
11139                         "bad extent[%llu, %llu) type mismatch with chunk",
11140                                         extent_key.objectid,
11141                                         extent_key.objectid + nodesize);
11142                                 err |= CHUNK_TYPE_MISMATCH;
11143                         }
11144                 }
11145 next:
11146                 ret = btrfs_next_item(extent_root, &path);
11147                 if (ret)
11148                         break;
11149         }
11150
11151 out:
11152         btrfs_release_path(&path);
11153
11154         if (total != used) {
11155                 error(
11156                 "block group[%llu %llu] used %llu but extent items used %llu",
11157                         bg_key.objectid, bg_key.offset, used, total);
11158                 err |= ACCOUNTING_MISMATCH;
11159         }
11160         return err;
11161 }
11162
11163 /*
11164  * Check a chunk item.
11165  * Including checking all referred dev_extents and block group
11166  */
11167 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11168                             struct extent_buffer *eb, int slot)
11169 {
11170         struct btrfs_root *extent_root = fs_info->extent_root;
11171         struct btrfs_root *dev_root = fs_info->dev_root;
11172         struct btrfs_path path;
11173         struct btrfs_key chunk_key;
11174         struct btrfs_key bg_key;
11175         struct btrfs_key devext_key;
11176         struct btrfs_chunk *chunk;
11177         struct extent_buffer *leaf;
11178         struct btrfs_block_group_item *bi;
11179         struct btrfs_block_group_item bg_item;
11180         struct btrfs_dev_extent *ptr;
11181         u64 length;
11182         u64 chunk_end;
11183         u64 stripe_len;
11184         u64 type;
11185         int num_stripes;
11186         u64 offset;
11187         u64 objectid;
11188         int i;
11189         int ret;
11190         int err = 0;
11191
11192         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11193         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11194         length = btrfs_chunk_length(eb, chunk);
11195         chunk_end = chunk_key.offset + length;
11196         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11197                                       chunk_key.offset);
11198         if (ret < 0) {
11199                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11200                         chunk_end);
11201                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11202                 goto out;
11203         }
11204         type = btrfs_chunk_type(eb, chunk);
11205
11206         bg_key.objectid = chunk_key.offset;
11207         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11208         bg_key.offset = length;
11209
11210         btrfs_init_path(&path);
11211         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11212         if (ret) {
11213                 error(
11214                 "chunk[%llu %llu) did not find the related block group item",
11215                         chunk_key.offset, chunk_end);
11216                 err |= REFERENCER_MISSING;
11217         } else{
11218                 leaf = path.nodes[0];
11219                 bi = btrfs_item_ptr(leaf, path.slots[0],
11220                                     struct btrfs_block_group_item);
11221                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11222                                    sizeof(bg_item));
11223                 if (btrfs_block_group_flags(&bg_item) != type) {
11224                         error(
11225 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11226                                 chunk_key.offset, chunk_end, type,
11227                                 btrfs_block_group_flags(&bg_item));
11228                         err |= REFERENCER_MISSING;
11229                 }
11230         }
11231
11232         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11233         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11234         for (i = 0; i < num_stripes; i++) {
11235                 btrfs_release_path(&path);
11236                 btrfs_init_path(&path);
11237                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11238                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11239                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11240
11241                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11242                                         0, 0);
11243                 if (ret)
11244                         goto not_match_dev;
11245
11246                 leaf = path.nodes[0];
11247                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11248                                      struct btrfs_dev_extent);
11249                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11250                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11251                 if (objectid != chunk_key.objectid ||
11252                     offset != chunk_key.offset ||
11253                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11254                         goto not_match_dev;
11255                 continue;
11256 not_match_dev:
11257                 err |= BACKREF_MISSING;
11258                 error(
11259                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11260                         chunk_key.objectid, chunk_end, i);
11261                 continue;
11262         }
11263         btrfs_release_path(&path);
11264 out:
11265         return err;
11266 }
11267
11268 /*
11269  * Main entry function to check known items and update related accounting info
11270  */
11271 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11272 {
11273         struct btrfs_fs_info *fs_info = root->fs_info;
11274         struct btrfs_key key;
11275         int slot = 0;
11276         int type;
11277         struct btrfs_extent_data_ref *dref;
11278         int ret;
11279         int err = 0;
11280
11281 next:
11282         btrfs_item_key_to_cpu(eb, &key, slot);
11283         type = key.type;
11284
11285         switch (type) {
11286         case BTRFS_EXTENT_DATA_KEY:
11287                 ret = check_extent_data_item(root, eb, slot);
11288                 err |= ret;
11289                 break;
11290         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11291                 ret = check_block_group_item(fs_info, eb, slot);
11292                 err |= ret;
11293                 break;
11294         case BTRFS_DEV_ITEM_KEY:
11295                 ret = check_dev_item(fs_info, eb, slot);
11296                 err |= ret;
11297                 break;
11298         case BTRFS_CHUNK_ITEM_KEY:
11299                 ret = check_chunk_item(fs_info, eb, slot);
11300                 err |= ret;
11301                 break;
11302         case BTRFS_DEV_EXTENT_KEY:
11303                 ret = check_dev_extent_item(fs_info, eb, slot);
11304                 err |= ret;
11305                 break;
11306         case BTRFS_EXTENT_ITEM_KEY:
11307         case BTRFS_METADATA_ITEM_KEY:
11308                 ret = check_extent_item(fs_info, eb, slot);
11309                 err |= ret;
11310                 break;
11311         case BTRFS_EXTENT_CSUM_KEY:
11312                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11313                 break;
11314         case BTRFS_TREE_BLOCK_REF_KEY:
11315                 ret = check_tree_block_backref(fs_info, key.offset,
11316                                                key.objectid, -1);
11317                 err |= ret;
11318                 break;
11319         case BTRFS_EXTENT_DATA_REF_KEY:
11320                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11321                 ret = check_extent_data_backref(fs_info,
11322                                 btrfs_extent_data_ref_root(eb, dref),
11323                                 btrfs_extent_data_ref_objectid(eb, dref),
11324                                 btrfs_extent_data_ref_offset(eb, dref),
11325                                 key.objectid, 0,
11326                                 btrfs_extent_data_ref_count(eb, dref));
11327                 err |= ret;
11328                 break;
11329         case BTRFS_SHARED_BLOCK_REF_KEY:
11330                 ret = check_shared_block_backref(fs_info, key.offset,
11331                                                  key.objectid, -1);
11332                 err |= ret;
11333                 break;
11334         case BTRFS_SHARED_DATA_REF_KEY:
11335                 ret = check_shared_data_backref(fs_info, key.offset,
11336                                                 key.objectid);
11337                 err |= ret;
11338                 break;
11339         default:
11340                 break;
11341         }
11342
11343         if (++slot < btrfs_header_nritems(eb))
11344                 goto next;
11345
11346         return err;
11347 }
11348
11349 /*
11350  * Helper function for later fs/subvol tree check.  To determine if a tree
11351  * block should be checked.
11352  * This function will ensure only the direct referencer with lowest rootid to
11353  * check a fs/subvolume tree block.
11354  *
11355  * Backref check at extent tree would detect errors like missing subvolume
11356  * tree, so we can do aggressive check to reduce duplicated checks.
11357  */
11358 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11359 {
11360         struct btrfs_root *extent_root = root->fs_info->extent_root;
11361         struct btrfs_key key;
11362         struct btrfs_path path;
11363         struct extent_buffer *leaf;
11364         int slot;
11365         struct btrfs_extent_item *ei;
11366         unsigned long ptr;
11367         unsigned long end;
11368         int type;
11369         u32 item_size;
11370         u64 offset;
11371         struct btrfs_extent_inline_ref *iref;
11372         int ret;
11373
11374         btrfs_init_path(&path);
11375         key.objectid = btrfs_header_bytenr(eb);
11376         key.type = BTRFS_METADATA_ITEM_KEY;
11377         key.offset = (u64)-1;
11378
11379         /*
11380          * Any failure in backref resolving means we can't determine
11381          * whom the tree block belongs to.
11382          * So in that case, we need to check that tree block
11383          */
11384         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11385         if (ret < 0)
11386                 goto need_check;
11387
11388         ret = btrfs_previous_extent_item(extent_root, &path,
11389                                          btrfs_header_bytenr(eb));
11390         if (ret)
11391                 goto need_check;
11392
11393         leaf = path.nodes[0];
11394         slot = path.slots[0];
11395         btrfs_item_key_to_cpu(leaf, &key, slot);
11396         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11397
11398         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11399                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11400         } else {
11401                 struct btrfs_tree_block_info *info;
11402
11403                 info = (struct btrfs_tree_block_info *)(ei + 1);
11404                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11405         }
11406
11407         item_size = btrfs_item_size_nr(leaf, slot);
11408         ptr = (unsigned long)iref;
11409         end = (unsigned long)ei + item_size;
11410         while (ptr < end) {
11411                 iref = (struct btrfs_extent_inline_ref *)ptr;
11412                 type = btrfs_extent_inline_ref_type(leaf, iref);
11413                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11414
11415                 /*
11416                  * We only check the tree block if current root is
11417                  * the lowest referencer of it.
11418                  */
11419                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11420                     offset < root->objectid) {
11421                         btrfs_release_path(&path);
11422                         return 0;
11423                 }
11424
11425                 ptr += btrfs_extent_inline_ref_size(type);
11426         }
11427         /*
11428          * Normally we should also check keyed tree block ref, but that may be
11429          * very time consuming.  Inlined ref should already make us skip a lot
11430          * of refs now.  So skip search keyed tree block ref.
11431          */
11432
11433 need_check:
11434         btrfs_release_path(&path);
11435         return 1;
11436 }
11437
11438 /*
11439  * Traversal function for tree block. We will do:
11440  * 1) Skip shared fs/subvolume tree blocks
11441  * 2) Update related bytes accounting
11442  * 3) Pre-order traversal
11443  */
11444 static int traverse_tree_block(struct btrfs_root *root,
11445                                 struct extent_buffer *node)
11446 {
11447         struct extent_buffer *eb;
11448         struct btrfs_key key;
11449         struct btrfs_key drop_key;
11450         int level;
11451         u64 nr;
11452         int i;
11453         int err = 0;
11454         int ret;
11455
11456         /*
11457          * Skip shared fs/subvolume tree block, in that case they will
11458          * be checked by referencer with lowest rootid
11459          */
11460         if (is_fstree(root->objectid) && !should_check(root, node))
11461                 return 0;
11462
11463         /* Update bytes accounting */
11464         total_btree_bytes += node->len;
11465         if (fs_root_objectid(btrfs_header_owner(node)))
11466                 total_fs_tree_bytes += node->len;
11467         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11468                 total_extent_tree_bytes += node->len;
11469
11470         /* pre-order tranversal, check itself first */
11471         level = btrfs_header_level(node);
11472         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11473                                    btrfs_header_level(node),
11474                                    btrfs_header_owner(node));
11475         err |= ret;
11476         if (err)
11477                 error(
11478         "check %s failed root %llu bytenr %llu level %d, force continue check",
11479                         level ? "node":"leaf", root->objectid,
11480                         btrfs_header_bytenr(node), btrfs_header_level(node));
11481
11482         if (!level) {
11483                 btree_space_waste += btrfs_leaf_free_space(root, node);
11484                 ret = check_leaf_items(root, node);
11485                 err |= ret;
11486                 return err;
11487         }
11488
11489         nr = btrfs_header_nritems(node);
11490         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11491         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11492                 sizeof(struct btrfs_key_ptr);
11493
11494         /* Then check all its children */
11495         for (i = 0; i < nr; i++) {
11496                 u64 blocknr = btrfs_node_blockptr(node, i);
11497
11498                 btrfs_node_key_to_cpu(node, &key, i);
11499                 if (level == root->root_item.drop_level &&
11500                     is_dropped_key(&key, &drop_key))
11501                         continue;
11502
11503                 /*
11504                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11505                  * to call the function itself.
11506                  */
11507                 eb = read_tree_block(root->fs_info, blocknr, 0);
11508                 if (extent_buffer_uptodate(eb)) {
11509                         ret = traverse_tree_block(root, eb);
11510                         err |= ret;
11511                 }
11512                 free_extent_buffer(eb);
11513         }
11514
11515         return err;
11516 }
11517
11518 /*
11519  * Low memory usage version check_chunks_and_extents.
11520  */
11521 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11522 {
11523         struct btrfs_path path;
11524         struct btrfs_key key;
11525         struct btrfs_root *root1;
11526         struct btrfs_root *root;
11527         struct btrfs_root *cur_root;
11528         int err = 0;
11529         int ret;
11530
11531         root = fs_info->fs_root;
11532
11533         root1 = root->fs_info->chunk_root;
11534         ret = traverse_tree_block(root1, root1->node);
11535         err |= ret;
11536
11537         root1 = root->fs_info->tree_root;
11538         ret = traverse_tree_block(root1, root1->node);
11539         err |= ret;
11540
11541         btrfs_init_path(&path);
11542         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11543         key.offset = 0;
11544         key.type = BTRFS_ROOT_ITEM_KEY;
11545
11546         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11547         if (ret) {
11548                 error("cannot find extent treet in tree_root");
11549                 goto out;
11550         }
11551
11552         while (1) {
11553                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11554                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11555                         goto next;
11556                 key.offset = (u64)-1;
11557
11558                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11559                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11560                                         &key);
11561                 else
11562                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11563                 if (IS_ERR(cur_root) || !cur_root) {
11564                         error("failed to read tree: %lld", key.objectid);
11565                         goto next;
11566                 }
11567
11568                 ret = traverse_tree_block(cur_root, cur_root->node);
11569                 err |= ret;
11570
11571                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11572                         btrfs_free_fs_root(cur_root);
11573 next:
11574                 ret = btrfs_next_item(root1, &path);
11575                 if (ret)
11576                         goto out;
11577         }
11578
11579 out:
11580         btrfs_release_path(&path);
11581         return err;
11582 }
11583
11584 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11585 {
11586         int ret;
11587
11588         if (!ctx.progress_enabled)
11589                 fprintf(stderr, "checking extents\n");
11590         if (check_mode == CHECK_MODE_LOWMEM)
11591                 ret = check_chunks_and_extents_v2(fs_info);
11592         else
11593                 ret = check_chunks_and_extents(fs_info);
11594
11595         return ret;
11596 }
11597
11598 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11599                            struct btrfs_root *root, int overwrite)
11600 {
11601         struct extent_buffer *c;
11602         struct extent_buffer *old = root->node;
11603         int level;
11604         int ret;
11605         struct btrfs_disk_key disk_key = {0,0,0};
11606
11607         level = 0;
11608
11609         if (overwrite) {
11610                 c = old;
11611                 extent_buffer_get(c);
11612                 goto init;
11613         }
11614         c = btrfs_alloc_free_block(trans, root,
11615                                    root->fs_info->nodesize,
11616                                    root->root_key.objectid,
11617                                    &disk_key, level, 0, 0);
11618         if (IS_ERR(c)) {
11619                 c = old;
11620                 extent_buffer_get(c);
11621                 overwrite = 1;
11622         }
11623 init:
11624         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11625         btrfs_set_header_level(c, level);
11626         btrfs_set_header_bytenr(c, c->start);
11627         btrfs_set_header_generation(c, trans->transid);
11628         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11629         btrfs_set_header_owner(c, root->root_key.objectid);
11630
11631         write_extent_buffer(c, root->fs_info->fsid,
11632                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11633
11634         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11635                             btrfs_header_chunk_tree_uuid(c),
11636                             BTRFS_UUID_SIZE);
11637
11638         btrfs_mark_buffer_dirty(c);
11639         /*
11640          * this case can happen in the following case:
11641          *
11642          * 1.overwrite previous root.
11643          *
11644          * 2.reinit reloc data root, this is because we skip pin
11645          * down reloc data tree before which means we can allocate
11646          * same block bytenr here.
11647          */
11648         if (old->start == c->start) {
11649                 btrfs_set_root_generation(&root->root_item,
11650                                           trans->transid);
11651                 root->root_item.level = btrfs_header_level(root->node);
11652                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11653                                         &root->root_key, &root->root_item);
11654                 if (ret) {
11655                         free_extent_buffer(c);
11656                         return ret;
11657                 }
11658         }
11659         free_extent_buffer(old);
11660         root->node = c;
11661         add_root_to_dirty_list(root);
11662         return 0;
11663 }
11664
11665 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11666                                 struct extent_buffer *eb, int tree_root)
11667 {
11668         struct extent_buffer *tmp;
11669         struct btrfs_root_item *ri;
11670         struct btrfs_key key;
11671         u64 bytenr;
11672         int level = btrfs_header_level(eb);
11673         int nritems;
11674         int ret;
11675         int i;
11676
11677         /*
11678          * If we have pinned this block before, don't pin it again.
11679          * This can not only avoid forever loop with broken filesystem
11680          * but also give us some speedups.
11681          */
11682         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11683                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11684                 return 0;
11685
11686         btrfs_pin_extent(fs_info, eb->start, eb->len);
11687
11688         nritems = btrfs_header_nritems(eb);
11689         for (i = 0; i < nritems; i++) {
11690                 if (level == 0) {
11691                         btrfs_item_key_to_cpu(eb, &key, i);
11692                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11693                                 continue;
11694                         /* Skip the extent root and reloc roots */
11695                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11696                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11697                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11698                                 continue;
11699                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11700                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11701
11702                         /*
11703                          * If at any point we start needing the real root we
11704                          * will have to build a stump root for the root we are
11705                          * in, but for now this doesn't actually use the root so
11706                          * just pass in extent_root.
11707                          */
11708                         tmp = read_tree_block(fs_info, bytenr, 0);
11709                         if (!extent_buffer_uptodate(tmp)) {
11710                                 fprintf(stderr, "Error reading root block\n");
11711                                 return -EIO;
11712                         }
11713                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11714                         free_extent_buffer(tmp);
11715                         if (ret)
11716                                 return ret;
11717                 } else {
11718                         bytenr = btrfs_node_blockptr(eb, i);
11719
11720                         /* If we aren't the tree root don't read the block */
11721                         if (level == 1 && !tree_root) {
11722                                 btrfs_pin_extent(fs_info, bytenr,
11723                                                 fs_info->nodesize);
11724                                 continue;
11725                         }
11726
11727                         tmp = read_tree_block(fs_info, bytenr, 0);
11728                         if (!extent_buffer_uptodate(tmp)) {
11729                                 fprintf(stderr, "Error reading tree block\n");
11730                                 return -EIO;
11731                         }
11732                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11733                         free_extent_buffer(tmp);
11734                         if (ret)
11735                                 return ret;
11736                 }
11737         }
11738
11739         return 0;
11740 }
11741
11742 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11743 {
11744         int ret;
11745
11746         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11747         if (ret)
11748                 return ret;
11749
11750         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11751 }
11752
11753 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11754 {
11755         struct btrfs_block_group_cache *cache;
11756         struct btrfs_path path;
11757         struct extent_buffer *leaf;
11758         struct btrfs_chunk *chunk;
11759         struct btrfs_key key;
11760         int ret;
11761         u64 start;
11762
11763         btrfs_init_path(&path);
11764         key.objectid = 0;
11765         key.type = BTRFS_CHUNK_ITEM_KEY;
11766         key.offset = 0;
11767         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11768         if (ret < 0) {
11769                 btrfs_release_path(&path);
11770                 return ret;
11771         }
11772
11773         /*
11774          * We do this in case the block groups were screwed up and had alloc
11775          * bits that aren't actually set on the chunks.  This happens with
11776          * restored images every time and could happen in real life I guess.
11777          */
11778         fs_info->avail_data_alloc_bits = 0;
11779         fs_info->avail_metadata_alloc_bits = 0;
11780         fs_info->avail_system_alloc_bits = 0;
11781
11782         /* First we need to create the in-memory block groups */
11783         while (1) {
11784                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11785                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11786                         if (ret < 0) {
11787                                 btrfs_release_path(&path);
11788                                 return ret;
11789                         }
11790                         if (ret) {
11791                                 ret = 0;
11792                                 break;
11793                         }
11794                 }
11795                 leaf = path.nodes[0];
11796                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11797                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11798                         path.slots[0]++;
11799                         continue;
11800                 }
11801
11802                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11803                 btrfs_add_block_group(fs_info, 0,
11804                                       btrfs_chunk_type(leaf, chunk),
11805                                       key.objectid, key.offset,
11806                                       btrfs_chunk_length(leaf, chunk));
11807                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11808                                  key.offset + btrfs_chunk_length(leaf, chunk));
11809                 path.slots[0]++;
11810         }
11811         start = 0;
11812         while (1) {
11813                 cache = btrfs_lookup_first_block_group(fs_info, start);
11814                 if (!cache)
11815                         break;
11816                 cache->cached = 1;
11817                 start = cache->key.objectid + cache->key.offset;
11818         }
11819
11820         btrfs_release_path(&path);
11821         return 0;
11822 }
11823
11824 static int reset_balance(struct btrfs_trans_handle *trans,
11825                          struct btrfs_fs_info *fs_info)
11826 {
11827         struct btrfs_root *root = fs_info->tree_root;
11828         struct btrfs_path path;
11829         struct extent_buffer *leaf;
11830         struct btrfs_key key;
11831         int del_slot, del_nr = 0;
11832         int ret;
11833         int found = 0;
11834
11835         btrfs_init_path(&path);
11836         key.objectid = BTRFS_BALANCE_OBJECTID;
11837         key.type = BTRFS_BALANCE_ITEM_KEY;
11838         key.offset = 0;
11839         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11840         if (ret) {
11841                 if (ret > 0)
11842                         ret = 0;
11843                 if (!ret)
11844                         goto reinit_data_reloc;
11845                 else
11846                         goto out;
11847         }
11848
11849         ret = btrfs_del_item(trans, root, &path);
11850         if (ret)
11851                 goto out;
11852         btrfs_release_path(&path);
11853
11854         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11855         key.type = BTRFS_ROOT_ITEM_KEY;
11856         key.offset = 0;
11857         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11858         if (ret < 0)
11859                 goto out;
11860         while (1) {
11861                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11862                         if (!found)
11863                                 break;
11864
11865                         if (del_nr) {
11866                                 ret = btrfs_del_items(trans, root, &path,
11867                                                       del_slot, del_nr);
11868                                 del_nr = 0;
11869                                 if (ret)
11870                                         goto out;
11871                         }
11872                         key.offset++;
11873                         btrfs_release_path(&path);
11874
11875                         found = 0;
11876                         ret = btrfs_search_slot(trans, root, &key, &path,
11877                                                 -1, 1);
11878                         if (ret < 0)
11879                                 goto out;
11880                         continue;
11881                 }
11882                 found = 1;
11883                 leaf = path.nodes[0];
11884                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11885                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11886                         break;
11887                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11888                         path.slots[0]++;
11889                         continue;
11890                 }
11891                 if (!del_nr) {
11892                         del_slot = path.slots[0];
11893                         del_nr = 1;
11894                 } else {
11895                         del_nr++;
11896                 }
11897                 path.slots[0]++;
11898         }
11899
11900         if (del_nr) {
11901                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11902                 if (ret)
11903                         goto out;
11904         }
11905         btrfs_release_path(&path);
11906
11907 reinit_data_reloc:
11908         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11909         key.type = BTRFS_ROOT_ITEM_KEY;
11910         key.offset = (u64)-1;
11911         root = btrfs_read_fs_root(fs_info, &key);
11912         if (IS_ERR(root)) {
11913                 fprintf(stderr, "Error reading data reloc tree\n");
11914                 ret = PTR_ERR(root);
11915                 goto out;
11916         }
11917         record_root_in_trans(trans, root);
11918         ret = btrfs_fsck_reinit_root(trans, root, 0);
11919         if (ret)
11920                 goto out;
11921         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11922 out:
11923         btrfs_release_path(&path);
11924         return ret;
11925 }
11926
11927 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11928                               struct btrfs_fs_info *fs_info)
11929 {
11930         u64 start = 0;
11931         int ret;
11932
11933         /*
11934          * The only reason we don't do this is because right now we're just
11935          * walking the trees we find and pinning down their bytes, we don't look
11936          * at any of the leaves.  In order to do mixed groups we'd have to check
11937          * the leaves of any fs roots and pin down the bytes for any file
11938          * extents we find.  Not hard but why do it if we don't have to?
11939          */
11940         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11941                 fprintf(stderr, "We don't support re-initing the extent tree "
11942                         "for mixed block groups yet, please notify a btrfs "
11943                         "developer you want to do this so they can add this "
11944                         "functionality.\n");
11945                 return -EINVAL;
11946         }
11947
11948         /*
11949          * first we need to walk all of the trees except the extent tree and pin
11950          * down the bytes that are in use so we don't overwrite any existing
11951          * metadata.
11952          */
11953         ret = pin_metadata_blocks(fs_info);
11954         if (ret) {
11955                 fprintf(stderr, "error pinning down used bytes\n");
11956                 return ret;
11957         }
11958
11959         /*
11960          * Need to drop all the block groups since we're going to recreate all
11961          * of them again.
11962          */
11963         btrfs_free_block_groups(fs_info);
11964         ret = reset_block_groups(fs_info);
11965         if (ret) {
11966                 fprintf(stderr, "error resetting the block groups\n");
11967                 return ret;
11968         }
11969
11970         /* Ok we can allocate now, reinit the extent root */
11971         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11972         if (ret) {
11973                 fprintf(stderr, "extent root initialization failed\n");
11974                 /*
11975                  * When the transaction code is updated we should end the
11976                  * transaction, but for now progs only knows about commit so
11977                  * just return an error.
11978                  */
11979                 return ret;
11980         }
11981
11982         /*
11983          * Now we have all the in-memory block groups setup so we can make
11984          * allocations properly, and the metadata we care about is safe since we
11985          * pinned all of it above.
11986          */
11987         while (1) {
11988                 struct btrfs_block_group_cache *cache;
11989
11990                 cache = btrfs_lookup_first_block_group(fs_info, start);
11991                 if (!cache)
11992                         break;
11993                 start = cache->key.objectid + cache->key.offset;
11994                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11995                                         &cache->key, &cache->item,
11996                                         sizeof(cache->item));
11997                 if (ret) {
11998                         fprintf(stderr, "Error adding block group\n");
11999                         return ret;
12000                 }
12001                 btrfs_extent_post_op(trans, fs_info->extent_root);
12002         }
12003
12004         ret = reset_balance(trans, fs_info);
12005         if (ret)
12006                 fprintf(stderr, "error resetting the pending balance\n");
12007
12008         return ret;
12009 }
12010
12011 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12012 {
12013         struct btrfs_path path;
12014         struct btrfs_trans_handle *trans;
12015         struct btrfs_key key;
12016         int ret;
12017
12018         printf("Recowing metadata block %llu\n", eb->start);
12019         key.objectid = btrfs_header_owner(eb);
12020         key.type = BTRFS_ROOT_ITEM_KEY;
12021         key.offset = (u64)-1;
12022
12023         root = btrfs_read_fs_root(root->fs_info, &key);
12024         if (IS_ERR(root)) {
12025                 fprintf(stderr, "Couldn't find owner root %llu\n",
12026                         key.objectid);
12027                 return PTR_ERR(root);
12028         }
12029
12030         trans = btrfs_start_transaction(root, 1);
12031         if (IS_ERR(trans))
12032                 return PTR_ERR(trans);
12033
12034         btrfs_init_path(&path);
12035         path.lowest_level = btrfs_header_level(eb);
12036         if (path.lowest_level)
12037                 btrfs_node_key_to_cpu(eb, &key, 0);
12038         else
12039                 btrfs_item_key_to_cpu(eb, &key, 0);
12040
12041         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12042         btrfs_commit_transaction(trans, root);
12043         btrfs_release_path(&path);
12044         return ret;
12045 }
12046
12047 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12048 {
12049         struct btrfs_path path;
12050         struct btrfs_trans_handle *trans;
12051         struct btrfs_key key;
12052         int ret;
12053
12054         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12055                bad->key.type, bad->key.offset);
12056         key.objectid = bad->root_id;
12057         key.type = BTRFS_ROOT_ITEM_KEY;
12058         key.offset = (u64)-1;
12059
12060         root = btrfs_read_fs_root(root->fs_info, &key);
12061         if (IS_ERR(root)) {
12062                 fprintf(stderr, "Couldn't find owner root %llu\n",
12063                         key.objectid);
12064                 return PTR_ERR(root);
12065         }
12066
12067         trans = btrfs_start_transaction(root, 1);
12068         if (IS_ERR(trans))
12069                 return PTR_ERR(trans);
12070
12071         btrfs_init_path(&path);
12072         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12073         if (ret) {
12074                 if (ret > 0)
12075                         ret = 0;
12076                 goto out;
12077         }
12078         ret = btrfs_del_item(trans, root, &path);
12079 out:
12080         btrfs_commit_transaction(trans, root);
12081         btrfs_release_path(&path);
12082         return ret;
12083 }
12084
12085 static int zero_log_tree(struct btrfs_root *root)
12086 {
12087         struct btrfs_trans_handle *trans;
12088         int ret;
12089
12090         trans = btrfs_start_transaction(root, 1);
12091         if (IS_ERR(trans)) {
12092                 ret = PTR_ERR(trans);
12093                 return ret;
12094         }
12095         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12096         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12097         ret = btrfs_commit_transaction(trans, root);
12098         return ret;
12099 }
12100
12101 static int populate_csum(struct btrfs_trans_handle *trans,
12102                          struct btrfs_root *csum_root, char *buf, u64 start,
12103                          u64 len)
12104 {
12105         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12106         u64 offset = 0;
12107         u64 sectorsize;
12108         int ret = 0;
12109
12110         while (offset < len) {
12111                 sectorsize = fs_info->sectorsize;
12112                 ret = read_extent_data(fs_info, buf, start + offset,
12113                                        &sectorsize, 0);
12114                 if (ret)
12115                         break;
12116                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12117                                             start + offset, buf, sectorsize);
12118                 if (ret)
12119                         break;
12120                 offset += sectorsize;
12121         }
12122         return ret;
12123 }
12124
12125 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12126                                       struct btrfs_root *csum_root,
12127                                       struct btrfs_root *cur_root)
12128 {
12129         struct btrfs_path path;
12130         struct btrfs_key key;
12131         struct extent_buffer *node;
12132         struct btrfs_file_extent_item *fi;
12133         char *buf = NULL;
12134         u64 start = 0;
12135         u64 len = 0;
12136         int slot = 0;
12137         int ret = 0;
12138
12139         buf = malloc(cur_root->fs_info->sectorsize);
12140         if (!buf)
12141                 return -ENOMEM;
12142
12143         btrfs_init_path(&path);
12144         key.objectid = 0;
12145         key.offset = 0;
12146         key.type = 0;
12147         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12148         if (ret < 0)
12149                 goto out;
12150         /* Iterate all regular file extents and fill its csum */
12151         while (1) {
12152                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12153
12154                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12155                         goto next;
12156                 node = path.nodes[0];
12157                 slot = path.slots[0];
12158                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12159                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12160                         goto next;
12161                 start = btrfs_file_extent_disk_bytenr(node, fi);
12162                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12163
12164                 ret = populate_csum(trans, csum_root, buf, start, len);
12165                 if (ret == -EEXIST)
12166                         ret = 0;
12167                 if (ret < 0)
12168                         goto out;
12169 next:
12170                 /*
12171                  * TODO: if next leaf is corrupted, jump to nearest next valid
12172                  * leaf.
12173                  */
12174                 ret = btrfs_next_item(cur_root, &path);
12175                 if (ret < 0)
12176                         goto out;
12177                 if (ret > 0) {
12178                         ret = 0;
12179                         goto out;
12180                 }
12181         }
12182
12183 out:
12184         btrfs_release_path(&path);
12185         free(buf);
12186         return ret;
12187 }
12188
12189 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12190                                   struct btrfs_root *csum_root)
12191 {
12192         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12193         struct btrfs_path path;
12194         struct btrfs_root *tree_root = fs_info->tree_root;
12195         struct btrfs_root *cur_root;
12196         struct extent_buffer *node;
12197         struct btrfs_key key;
12198         int slot = 0;
12199         int ret = 0;
12200
12201         btrfs_init_path(&path);
12202         key.objectid = BTRFS_FS_TREE_OBJECTID;
12203         key.offset = 0;
12204         key.type = BTRFS_ROOT_ITEM_KEY;
12205         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12206         if (ret < 0)
12207                 goto out;
12208         if (ret > 0) {
12209                 ret = -ENOENT;
12210                 goto out;
12211         }
12212
12213         while (1) {
12214                 node = path.nodes[0];
12215                 slot = path.slots[0];
12216                 btrfs_item_key_to_cpu(node, &key, slot);
12217                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12218                         goto out;
12219                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12220                         goto next;
12221                 if (!is_fstree(key.objectid))
12222                         goto next;
12223                 key.offset = (u64)-1;
12224
12225                 cur_root = btrfs_read_fs_root(fs_info, &key);
12226                 if (IS_ERR(cur_root) || !cur_root) {
12227                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12228                                 key.objectid);
12229                         goto out;
12230                 }
12231                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12232                                 cur_root);
12233                 if (ret < 0)
12234                         goto out;
12235 next:
12236                 ret = btrfs_next_item(tree_root, &path);
12237                 if (ret > 0) {
12238                         ret = 0;
12239                         goto out;
12240                 }
12241                 if (ret < 0)
12242                         goto out;
12243         }
12244
12245 out:
12246         btrfs_release_path(&path);
12247         return ret;
12248 }
12249
12250 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12251                                       struct btrfs_root *csum_root)
12252 {
12253         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12254         struct btrfs_path path;
12255         struct btrfs_extent_item *ei;
12256         struct extent_buffer *leaf;
12257         char *buf;
12258         struct btrfs_key key;
12259         int ret;
12260
12261         btrfs_init_path(&path);
12262         key.objectid = 0;
12263         key.type = BTRFS_EXTENT_ITEM_KEY;
12264         key.offset = 0;
12265         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12266         if (ret < 0) {
12267                 btrfs_release_path(&path);
12268                 return ret;
12269         }
12270
12271         buf = malloc(csum_root->fs_info->sectorsize);
12272         if (!buf) {
12273                 btrfs_release_path(&path);
12274                 return -ENOMEM;
12275         }
12276
12277         while (1) {
12278                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12279                         ret = btrfs_next_leaf(extent_root, &path);
12280                         if (ret < 0)
12281                                 break;
12282                         if (ret) {
12283                                 ret = 0;
12284                                 break;
12285                         }
12286                 }
12287                 leaf = path.nodes[0];
12288
12289                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12290                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12291                         path.slots[0]++;
12292                         continue;
12293                 }
12294
12295                 ei = btrfs_item_ptr(leaf, path.slots[0],
12296                                     struct btrfs_extent_item);
12297                 if (!(btrfs_extent_flags(leaf, ei) &
12298                       BTRFS_EXTENT_FLAG_DATA)) {
12299                         path.slots[0]++;
12300                         continue;
12301                 }
12302
12303                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12304                                     key.offset);
12305                 if (ret)
12306                         break;
12307                 path.slots[0]++;
12308         }
12309
12310         btrfs_release_path(&path);
12311         free(buf);
12312         return ret;
12313 }
12314
12315 /*
12316  * Recalculate the csum and put it into the csum tree.
12317  *
12318  * Extent tree init will wipe out all the extent info, so in that case, we
12319  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12320  * will use fs/subvol trees to init the csum tree.
12321  */
12322 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12323                           struct btrfs_root *csum_root,
12324                           int search_fs_tree)
12325 {
12326         if (search_fs_tree)
12327                 return fill_csum_tree_from_fs(trans, csum_root);
12328         else
12329                 return fill_csum_tree_from_extent(trans, csum_root);
12330 }
12331
12332 static void free_roots_info_cache(void)
12333 {
12334         if (!roots_info_cache)
12335                 return;
12336
12337         while (!cache_tree_empty(roots_info_cache)) {
12338                 struct cache_extent *entry;
12339                 struct root_item_info *rii;
12340
12341                 entry = first_cache_extent(roots_info_cache);
12342                 if (!entry)
12343                         break;
12344                 remove_cache_extent(roots_info_cache, entry);
12345                 rii = container_of(entry, struct root_item_info, cache_extent);
12346                 free(rii);
12347         }
12348
12349         free(roots_info_cache);
12350         roots_info_cache = NULL;
12351 }
12352
12353 static int build_roots_info_cache(struct btrfs_fs_info *info)
12354 {
12355         int ret = 0;
12356         struct btrfs_key key;
12357         struct extent_buffer *leaf;
12358         struct btrfs_path path;
12359
12360         if (!roots_info_cache) {
12361                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12362                 if (!roots_info_cache)
12363                         return -ENOMEM;
12364                 cache_tree_init(roots_info_cache);
12365         }
12366
12367         btrfs_init_path(&path);
12368         key.objectid = 0;
12369         key.type = BTRFS_EXTENT_ITEM_KEY;
12370         key.offset = 0;
12371         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12372         if (ret < 0)
12373                 goto out;
12374         leaf = path.nodes[0];
12375
12376         while (1) {
12377                 struct btrfs_key found_key;
12378                 struct btrfs_extent_item *ei;
12379                 struct btrfs_extent_inline_ref *iref;
12380                 int slot = path.slots[0];
12381                 int type;
12382                 u64 flags;
12383                 u64 root_id;
12384                 u8 level;
12385                 struct cache_extent *entry;
12386                 struct root_item_info *rii;
12387
12388                 if (slot >= btrfs_header_nritems(leaf)) {
12389                         ret = btrfs_next_leaf(info->extent_root, &path);
12390                         if (ret < 0) {
12391                                 break;
12392                         } else if (ret) {
12393                                 ret = 0;
12394                                 break;
12395                         }
12396                         leaf = path.nodes[0];
12397                         slot = path.slots[0];
12398                 }
12399
12400                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12401
12402                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12403                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12404                         goto next;
12405
12406                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12407                 flags = btrfs_extent_flags(leaf, ei);
12408
12409                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12410                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12411                         goto next;
12412
12413                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12414                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12415                         level = found_key.offset;
12416                 } else {
12417                         struct btrfs_tree_block_info *binfo;
12418
12419                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12420                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12421                         level = btrfs_tree_block_level(leaf, binfo);
12422                 }
12423
12424                 /*
12425                  * For a root extent, it must be of the following type and the
12426                  * first (and only one) iref in the item.
12427                  */
12428                 type = btrfs_extent_inline_ref_type(leaf, iref);
12429                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12430                         goto next;
12431
12432                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12433                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12434                 if (!entry) {
12435                         rii = malloc(sizeof(struct root_item_info));
12436                         if (!rii) {
12437                                 ret = -ENOMEM;
12438                                 goto out;
12439                         }
12440                         rii->cache_extent.start = root_id;
12441                         rii->cache_extent.size = 1;
12442                         rii->level = (u8)-1;
12443                         entry = &rii->cache_extent;
12444                         ret = insert_cache_extent(roots_info_cache, entry);
12445                         ASSERT(ret == 0);
12446                 } else {
12447                         rii = container_of(entry, struct root_item_info,
12448                                            cache_extent);
12449                 }
12450
12451                 ASSERT(rii->cache_extent.start == root_id);
12452                 ASSERT(rii->cache_extent.size == 1);
12453
12454                 if (level > rii->level || rii->level == (u8)-1) {
12455                         rii->level = level;
12456                         rii->bytenr = found_key.objectid;
12457                         rii->gen = btrfs_extent_generation(leaf, ei);
12458                         rii->node_count = 1;
12459                 } else if (level == rii->level) {
12460                         rii->node_count++;
12461                 }
12462 next:
12463                 path.slots[0]++;
12464         }
12465
12466 out:
12467         btrfs_release_path(&path);
12468
12469         return ret;
12470 }
12471
12472 static int maybe_repair_root_item(struct btrfs_path *path,
12473                                   const struct btrfs_key *root_key,
12474                                   const int read_only_mode)
12475 {
12476         const u64 root_id = root_key->objectid;
12477         struct cache_extent *entry;
12478         struct root_item_info *rii;
12479         struct btrfs_root_item ri;
12480         unsigned long offset;
12481
12482         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12483         if (!entry) {
12484                 fprintf(stderr,
12485                         "Error: could not find extent items for root %llu\n",
12486                         root_key->objectid);
12487                 return -ENOENT;
12488         }
12489
12490         rii = container_of(entry, struct root_item_info, cache_extent);
12491         ASSERT(rii->cache_extent.start == root_id);
12492         ASSERT(rii->cache_extent.size == 1);
12493
12494         if (rii->node_count != 1) {
12495                 fprintf(stderr,
12496                         "Error: could not find btree root extent for root %llu\n",
12497                         root_id);
12498                 return -ENOENT;
12499         }
12500
12501         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12502         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12503
12504         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12505             btrfs_root_level(&ri) != rii->level ||
12506             btrfs_root_generation(&ri) != rii->gen) {
12507
12508                 /*
12509                  * If we're in repair mode but our caller told us to not update
12510                  * the root item, i.e. just check if it needs to be updated, don't
12511                  * print this message, since the caller will call us again shortly
12512                  * for the same root item without read only mode (the caller will
12513                  * open a transaction first).
12514                  */
12515                 if (!(read_only_mode && repair))
12516                         fprintf(stderr,
12517                                 "%sroot item for root %llu,"
12518                                 " current bytenr %llu, current gen %llu, current level %u,"
12519                                 " new bytenr %llu, new gen %llu, new level %u\n",
12520                                 (read_only_mode ? "" : "fixing "),
12521                                 root_id,
12522                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12523                                 btrfs_root_level(&ri),
12524                                 rii->bytenr, rii->gen, rii->level);
12525
12526                 if (btrfs_root_generation(&ri) > rii->gen) {
12527                         fprintf(stderr,
12528                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12529                                 root_id, btrfs_root_generation(&ri), rii->gen);
12530                         return -EINVAL;
12531                 }
12532
12533                 if (!read_only_mode) {
12534                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12535                         btrfs_set_root_level(&ri, rii->level);
12536                         btrfs_set_root_generation(&ri, rii->gen);
12537                         write_extent_buffer(path->nodes[0], &ri,
12538                                             offset, sizeof(ri));
12539                 }
12540
12541                 return 1;
12542         }
12543
12544         return 0;
12545 }
12546
12547 /*
12548  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12549  * caused read-only snapshots to be corrupted if they were created at a moment
12550  * when the source subvolume/snapshot had orphan items. The issue was that the
12551  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12552  * node instead of the post orphan cleanup root node.
12553  * So this function, and its callees, just detects and fixes those cases. Even
12554  * though the regression was for read-only snapshots, this function applies to
12555  * any snapshot/subvolume root.
12556  * This must be run before any other repair code - not doing it so, makes other
12557  * repair code delete or modify backrefs in the extent tree for example, which
12558  * will result in an inconsistent fs after repairing the root items.
12559  */
12560 static int repair_root_items(struct btrfs_fs_info *info)
12561 {
12562         struct btrfs_path path;
12563         struct btrfs_key key;
12564         struct extent_buffer *leaf;
12565         struct btrfs_trans_handle *trans = NULL;
12566         int ret = 0;
12567         int bad_roots = 0;
12568         int need_trans = 0;
12569
12570         btrfs_init_path(&path);
12571
12572         ret = build_roots_info_cache(info);
12573         if (ret)
12574                 goto out;
12575
12576         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12577         key.type = BTRFS_ROOT_ITEM_KEY;
12578         key.offset = 0;
12579
12580 again:
12581         /*
12582          * Avoid opening and committing transactions if a leaf doesn't have
12583          * any root items that need to be fixed, so that we avoid rotating
12584          * backup roots unnecessarily.
12585          */
12586         if (need_trans) {
12587                 trans = btrfs_start_transaction(info->tree_root, 1);
12588                 if (IS_ERR(trans)) {
12589                         ret = PTR_ERR(trans);
12590                         goto out;
12591                 }
12592         }
12593
12594         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12595                                 0, trans ? 1 : 0);
12596         if (ret < 0)
12597                 goto out;
12598         leaf = path.nodes[0];
12599
12600         while (1) {
12601                 struct btrfs_key found_key;
12602
12603                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12604                         int no_more_keys = find_next_key(&path, &key);
12605
12606                         btrfs_release_path(&path);
12607                         if (trans) {
12608                                 ret = btrfs_commit_transaction(trans,
12609                                                                info->tree_root);
12610                                 trans = NULL;
12611                                 if (ret < 0)
12612                                         goto out;
12613                         }
12614                         need_trans = 0;
12615                         if (no_more_keys)
12616                                 break;
12617                         goto again;
12618                 }
12619
12620                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12621
12622                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12623                         goto next;
12624                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12625                         goto next;
12626
12627                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12628                 if (ret < 0)
12629                         goto out;
12630                 if (ret) {
12631                         if (!trans && repair) {
12632                                 need_trans = 1;
12633                                 key = found_key;
12634                                 btrfs_release_path(&path);
12635                                 goto again;
12636                         }
12637                         bad_roots++;
12638                 }
12639 next:
12640                 path.slots[0]++;
12641         }
12642         ret = 0;
12643 out:
12644         free_roots_info_cache();
12645         btrfs_release_path(&path);
12646         if (trans)
12647                 btrfs_commit_transaction(trans, info->tree_root);
12648         if (ret < 0)
12649                 return ret;
12650
12651         return bad_roots;
12652 }
12653
12654 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12655 {
12656         struct btrfs_trans_handle *trans;
12657         struct btrfs_block_group_cache *bg_cache;
12658         u64 current = 0;
12659         int ret = 0;
12660
12661         /* Clear all free space cache inodes and its extent data */
12662         while (1) {
12663                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12664                 if (!bg_cache)
12665                         break;
12666                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12667                 if (ret < 0)
12668                         return ret;
12669                 current = bg_cache->key.objectid + bg_cache->key.offset;
12670         }
12671
12672         /* Don't forget to set cache_generation to -1 */
12673         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12674         if (IS_ERR(trans)) {
12675                 error("failed to update super block cache generation");
12676                 return PTR_ERR(trans);
12677         }
12678         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12679         btrfs_commit_transaction(trans, fs_info->tree_root);
12680
12681         return ret;
12682 }
12683
12684 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12685                 int clear_version)
12686 {
12687         int ret = 0;
12688
12689         if (clear_version == 1) {
12690                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12691                         error(
12692                 "free space cache v2 detected, use --clear-space-cache v2");
12693                         ret = 1;
12694                         goto close_out;
12695                 }
12696                 printf("Clearing free space cache\n");
12697                 ret = clear_free_space_cache(fs_info);
12698                 if (ret) {
12699                         error("failed to clear free space cache");
12700                         ret = 1;
12701                 } else {
12702                         printf("Free space cache cleared\n");
12703                 }
12704         } else if (clear_version == 2) {
12705                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12706                         printf("no free space cache v2 to clear\n");
12707                         ret = 0;
12708                         goto close_out;
12709                 }
12710                 printf("Clear free space cache v2\n");
12711                 ret = btrfs_clear_free_space_tree(fs_info);
12712                 if (ret) {
12713                         error("failed to clear free space cache v2: %d", ret);
12714                         ret = 1;
12715                 } else {
12716                         printf("free space cache v2 cleared\n");
12717                 }
12718         }
12719 close_out:
12720         return ret;
12721 }
12722
12723 const char * const cmd_check_usage[] = {
12724         "btrfs check [options] <device>",
12725         "Check structural integrity of a filesystem (unmounted).",
12726         "Check structural integrity of an unmounted filesystem. Verify internal",
12727         "trees' consistency and item connectivity. In the repair mode try to",
12728         "fix the problems found. ",
12729         "WARNING: the repair mode is considered dangerous",
12730         "",
12731         "-s|--super <superblock>     use this superblock copy",
12732         "-b|--backup                 use the first valid backup root copy",
12733         "--force                     skip mount checks, repair is not possible",
12734         "--repair                    try to repair the filesystem",
12735         "--readonly                  run in read-only mode (default)",
12736         "--init-csum-tree            create a new CRC tree",
12737         "--init-extent-tree          create a new extent tree",
12738         "--mode <MODE>               allows choice of memory/IO trade-offs",
12739         "                            where MODE is one of:",
12740         "                            original - read inodes and extents to memory (requires",
12741         "                                       more memory, does less IO)",
12742         "                            lowmem   - try to use less memory but read blocks again",
12743         "                                       when needed",
12744         "--check-data-csum           verify checksums of data blocks",
12745         "-Q|--qgroup-report          print a report on qgroup consistency",
12746         "-E|--subvol-extents <subvolid>",
12747         "                            print subvolume extents and sharing state",
12748         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12749         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12750         "-p|--progress               indicate progress",
12751         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12752         NULL
12753 };
12754
12755 int cmd_check(int argc, char **argv)
12756 {
12757         struct cache_tree root_cache;
12758         struct btrfs_root *root;
12759         struct btrfs_fs_info *info;
12760         u64 bytenr = 0;
12761         u64 subvolid = 0;
12762         u64 tree_root_bytenr = 0;
12763         u64 chunk_root_bytenr = 0;
12764         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12765         int ret = 0;
12766         int err = 0;
12767         u64 num;
12768         int init_csum_tree = 0;
12769         int readonly = 0;
12770         int clear_space_cache = 0;
12771         int qgroup_report = 0;
12772         int qgroups_repaired = 0;
12773         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12774         int force = 0;
12775
12776         while(1) {
12777                 int c;
12778                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12779                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12780                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12781                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
12782                         GETOPT_VAL_FORCE };
12783                 static const struct option long_options[] = {
12784                         { "super", required_argument, NULL, 's' },
12785                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12786                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12787                         { "init-csum-tree", no_argument, NULL,
12788                                 GETOPT_VAL_INIT_CSUM },
12789                         { "init-extent-tree", no_argument, NULL,
12790                                 GETOPT_VAL_INIT_EXTENT },
12791                         { "check-data-csum", no_argument, NULL,
12792                                 GETOPT_VAL_CHECK_CSUM },
12793                         { "backup", no_argument, NULL, 'b' },
12794                         { "subvol-extents", required_argument, NULL, 'E' },
12795                         { "qgroup-report", no_argument, NULL, 'Q' },
12796                         { "tree-root", required_argument, NULL, 'r' },
12797                         { "chunk-root", required_argument, NULL,
12798                                 GETOPT_VAL_CHUNK_TREE },
12799                         { "progress", no_argument, NULL, 'p' },
12800                         { "mode", required_argument, NULL,
12801                                 GETOPT_VAL_MODE },
12802                         { "clear-space-cache", required_argument, NULL,
12803                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12804                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
12805                         { NULL, 0, NULL, 0}
12806                 };
12807
12808                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12809                 if (c < 0)
12810                         break;
12811                 switch(c) {
12812                         case 'a': /* ignored */ break;
12813                         case 'b':
12814                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12815                                 break;
12816                         case 's':
12817                                 num = arg_strtou64(optarg);
12818                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12819                                         error(
12820                                         "super mirror should be less than %d",
12821                                                 BTRFS_SUPER_MIRROR_MAX);
12822                                         exit(1);
12823                                 }
12824                                 bytenr = btrfs_sb_offset(((int)num));
12825                                 printf("using SB copy %llu, bytenr %llu\n", num,
12826                                        (unsigned long long)bytenr);
12827                                 break;
12828                         case 'Q':
12829                                 qgroup_report = 1;
12830                                 break;
12831                         case 'E':
12832                                 subvolid = arg_strtou64(optarg);
12833                                 break;
12834                         case 'r':
12835                                 tree_root_bytenr = arg_strtou64(optarg);
12836                                 break;
12837                         case GETOPT_VAL_CHUNK_TREE:
12838                                 chunk_root_bytenr = arg_strtou64(optarg);
12839                                 break;
12840                         case 'p':
12841                                 ctx.progress_enabled = true;
12842                                 break;
12843                         case '?':
12844                         case 'h':
12845                                 usage(cmd_check_usage);
12846                         case GETOPT_VAL_REPAIR:
12847                                 printf("enabling repair mode\n");
12848                                 repair = 1;
12849                                 ctree_flags |= OPEN_CTREE_WRITES;
12850                                 break;
12851                         case GETOPT_VAL_READONLY:
12852                                 readonly = 1;
12853                                 break;
12854                         case GETOPT_VAL_INIT_CSUM:
12855                                 printf("Creating a new CRC tree\n");
12856                                 init_csum_tree = 1;
12857                                 repair = 1;
12858                                 ctree_flags |= OPEN_CTREE_WRITES;
12859                                 break;
12860                         case GETOPT_VAL_INIT_EXTENT:
12861                                 init_extent_tree = 1;
12862                                 ctree_flags |= (OPEN_CTREE_WRITES |
12863                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12864                                 repair = 1;
12865                                 break;
12866                         case GETOPT_VAL_CHECK_CSUM:
12867                                 check_data_csum = 1;
12868                                 break;
12869                         case GETOPT_VAL_MODE:
12870                                 check_mode = parse_check_mode(optarg);
12871                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12872                                         error("unknown mode: %s", optarg);
12873                                         exit(1);
12874                                 }
12875                                 break;
12876                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12877                                 if (strcmp(optarg, "v1") == 0) {
12878                                         clear_space_cache = 1;
12879                                 } else if (strcmp(optarg, "v2") == 0) {
12880                                         clear_space_cache = 2;
12881                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12882                                 } else {
12883                                         error(
12884                 "invalid argument to --clear-space-cache, must be v1 or v2");
12885                                         exit(1);
12886                                 }
12887                                 ctree_flags |= OPEN_CTREE_WRITES;
12888                                 break;
12889                         case GETOPT_VAL_FORCE:
12890                                 force = 1;
12891                                 break;
12892                 }
12893         }
12894
12895         if (check_argc_exact(argc - optind, 1))
12896                 usage(cmd_check_usage);
12897
12898         if (ctx.progress_enabled) {
12899                 ctx.tp = TASK_NOTHING;
12900                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12901         }
12902
12903         /* This check is the only reason for --readonly to exist */
12904         if (readonly && repair) {
12905                 error("repair options are not compatible with --readonly");
12906                 exit(1);
12907         }
12908
12909         /*
12910          * Not supported yet
12911          */
12912         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12913                 error("low memory mode doesn't support repair yet");
12914                 exit(1);
12915         }
12916
12917         radix_tree_init();
12918         cache_tree_init(&root_cache);
12919
12920         ret = check_mounted(argv[optind]);
12921         if (!force) {
12922                 if (ret < 0) {
12923                         error("could not check mount status: %s",
12924                                         strerror(-ret));
12925                         err |= !!ret;
12926                         goto err_out;
12927                 } else if (ret) {
12928                         error(
12929 "%s is currently mounted, use --force if you really intend to check the filesystem",
12930                                 argv[optind]);
12931                         ret = -EBUSY;
12932                         err |= !!ret;
12933                         goto err_out;
12934                 }
12935         } else {
12936                 if (repair) {
12937                         error("repair and --force is not yet supported");
12938                         ret = 1;
12939                         err |= !!ret;
12940                         goto err_out;
12941                 }
12942                 if (ret < 0) {
12943                         warning(
12944 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
12945                                 argv[optind]);
12946                 } else if (ret) {
12947                         warning(
12948                         "filesystem mounted, continuing because of --force");
12949                 }
12950         }
12951
12952         /* only allow partial opening under repair mode */
12953         if (repair)
12954                 ctree_flags |= OPEN_CTREE_PARTIAL;
12955
12956         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12957                                   chunk_root_bytenr, ctree_flags);
12958         if (!info) {
12959                 error("cannot open file system");
12960                 ret = -EIO;
12961                 err |= !!ret;
12962                 goto err_out;
12963         }
12964
12965         global_info = info;
12966         root = info->fs_root;
12967         uuid_unparse(info->super_copy->fsid, uuidbuf);
12968
12969         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12970
12971         /*
12972          * Check the bare minimum before starting anything else that could rely
12973          * on it, namely the tree roots, any local consistency checks
12974          */
12975         if (!extent_buffer_uptodate(info->tree_root->node) ||
12976             !extent_buffer_uptodate(info->dev_root->node) ||
12977             !extent_buffer_uptodate(info->chunk_root->node)) {
12978                 error("critical roots corrupted, unable to check the filesystem");
12979                 err |= !!ret;
12980                 ret = -EIO;
12981                 goto close_out;
12982         }
12983
12984         if (clear_space_cache) {
12985                 ret = do_clear_free_space_cache(info, clear_space_cache);
12986                 err |= !!ret;
12987                 goto close_out;
12988         }
12989
12990         /*
12991          * repair mode will force us to commit transaction which
12992          * will make us fail to load log tree when mounting.
12993          */
12994         if (repair && btrfs_super_log_root(info->super_copy)) {
12995                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12996                 if (!ret) {
12997                         ret = 1;
12998                         err |= !!ret;
12999                         goto close_out;
13000                 }
13001                 ret = zero_log_tree(root);
13002                 err |= !!ret;
13003                 if (ret) {
13004                         error("failed to zero log tree: %d", ret);
13005                         goto close_out;
13006                 }
13007         }
13008
13009         if (qgroup_report) {
13010                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13011                        uuidbuf);
13012                 ret = qgroup_verify_all(info);
13013                 err |= !!ret;
13014                 if (ret == 0)
13015                         report_qgroups(1);
13016                 goto close_out;
13017         }
13018         if (subvolid) {
13019                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13020                        subvolid, argv[optind], uuidbuf);
13021                 ret = print_extent_state(info, subvolid);
13022                 err |= !!ret;
13023                 goto close_out;
13024         }
13025
13026         if (init_extent_tree || init_csum_tree) {
13027                 struct btrfs_trans_handle *trans;
13028
13029                 trans = btrfs_start_transaction(info->extent_root, 0);
13030                 if (IS_ERR(trans)) {
13031                         error("error starting transaction");
13032                         ret = PTR_ERR(trans);
13033                         err |= !!ret;
13034                         goto close_out;
13035                 }
13036
13037                 if (init_extent_tree) {
13038                         printf("Creating a new extent tree\n");
13039                         ret = reinit_extent_tree(trans, info);
13040                         err |= !!ret;
13041                         if (ret)
13042                                 goto close_out;
13043                 }
13044
13045                 if (init_csum_tree) {
13046                         printf("Reinitialize checksum tree\n");
13047                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13048                         if (ret) {
13049                                 error("checksum tree initialization failed: %d",
13050                                                 ret);
13051                                 ret = -EIO;
13052                                 err |= !!ret;
13053                                 goto close_out;
13054                         }
13055
13056                         ret = fill_csum_tree(trans, info->csum_root,
13057                                              init_extent_tree);
13058                         err |= !!ret;
13059                         if (ret) {
13060                                 error("checksum tree refilling failed: %d", ret);
13061                                 return -EIO;
13062                         }
13063                 }
13064                 /*
13065                  * Ok now we commit and run the normal fsck, which will add
13066                  * extent entries for all of the items it finds.
13067                  */
13068                 ret = btrfs_commit_transaction(trans, info->extent_root);
13069                 err |= !!ret;
13070                 if (ret)
13071                         goto close_out;
13072         }
13073         if (!extent_buffer_uptodate(info->extent_root->node)) {
13074                 error("critical: extent_root, unable to check the filesystem");
13075                 ret = -EIO;
13076                 err |= !!ret;
13077                 goto close_out;
13078         }
13079         if (!extent_buffer_uptodate(info->csum_root->node)) {
13080                 error("critical: csum_root, unable to check the filesystem");
13081                 ret = -EIO;
13082                 err |= !!ret;
13083                 goto close_out;
13084         }
13085
13086         ret = do_check_chunks_and_extents(info);
13087         err |= !!ret;
13088         if (ret)
13089                 error(
13090                 "errors found in extent allocation tree or chunk allocation");
13091
13092         ret = repair_root_items(info);
13093         err |= !!ret;
13094         if (ret < 0) {
13095                 error("failed to repair root items: %s", strerror(-ret));
13096                 goto close_out;
13097         }
13098         if (repair) {
13099                 fprintf(stderr, "Fixed %d roots.\n", ret);
13100                 ret = 0;
13101         } else if (ret > 0) {
13102                 fprintf(stderr,
13103                        "Found %d roots with an outdated root item.\n",
13104                        ret);
13105                 fprintf(stderr,
13106                         "Please run a filesystem check with the option --repair to fix them.\n");
13107                 ret = 1;
13108                 err |= !!ret;
13109                 goto close_out;
13110         }
13111
13112         if (!ctx.progress_enabled) {
13113                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13114                         fprintf(stderr, "checking free space tree\n");
13115                 else
13116                         fprintf(stderr, "checking free space cache\n");
13117         }
13118         ret = check_space_cache(root);
13119         err |= !!ret;
13120         if (ret) {
13121                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13122                         error("errors found in free space tree");
13123                 else
13124                         error("errors found in free space cache");
13125                 goto out;
13126         }
13127
13128         /*
13129          * We used to have to have these hole extents in between our real
13130          * extents so if we don't have this flag set we need to make sure there
13131          * are no gaps in the file extents for inodes, otherwise we can just
13132          * ignore it when this happens.
13133          */
13134         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13135         ret = do_check_fs_roots(info, &root_cache);
13136         err |= !!ret;
13137         if (ret) {
13138                 error("errors found in fs roots");
13139                 goto out;
13140         }
13141
13142         fprintf(stderr, "checking csums\n");
13143         ret = check_csums(root);
13144         err |= !!ret;
13145         if (ret) {
13146                 error("errors found in csum tree");
13147                 goto out;
13148         }
13149
13150         fprintf(stderr, "checking root refs\n");
13151         /* For low memory mode, check_fs_roots_v2 handles root refs */
13152         if (check_mode != CHECK_MODE_LOWMEM) {
13153                 ret = check_root_refs(root, &root_cache);
13154                 err |= !!ret;
13155                 if (ret) {
13156                         error("errors found in root refs");
13157                         goto out;
13158                 }
13159         }
13160
13161         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13162                 struct extent_buffer *eb;
13163
13164                 eb = list_first_entry(&root->fs_info->recow_ebs,
13165                                       struct extent_buffer, recow);
13166                 list_del_init(&eb->recow);
13167                 ret = recow_extent_buffer(root, eb);
13168                 err |= !!ret;
13169                 if (ret) {
13170                         error("fails to fix transid errors");
13171                         break;
13172                 }
13173         }
13174
13175         while (!list_empty(&delete_items)) {
13176                 struct bad_item *bad;
13177
13178                 bad = list_first_entry(&delete_items, struct bad_item, list);
13179                 list_del_init(&bad->list);
13180                 if (repair) {
13181                         ret = delete_bad_item(root, bad);
13182                         err |= !!ret;
13183                 }
13184                 free(bad);
13185         }
13186
13187         if (info->quota_enabled) {
13188                 fprintf(stderr, "checking quota groups\n");
13189                 ret = qgroup_verify_all(info);
13190                 err |= !!ret;
13191                 if (ret) {
13192                         error("failed to check quota groups");
13193                         goto out;
13194                 }
13195                 report_qgroups(0);
13196                 ret = repair_qgroups(info, &qgroups_repaired);
13197                 err |= !!ret;
13198                 if (err) {
13199                         error("failed to repair quota groups");
13200                         goto out;
13201                 }
13202                 ret = 0;
13203         }
13204
13205         if (!list_empty(&root->fs_info->recow_ebs)) {
13206                 error("transid errors in file system");
13207                 ret = 1;
13208                 err |= !!ret;
13209         }
13210 out:
13211         printf("found %llu bytes used, ",
13212                (unsigned long long)bytes_used);
13213         if (err)
13214                 printf("error(s) found\n");
13215         else
13216                 printf("no error found\n");
13217         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13218         printf("total tree bytes: %llu\n",
13219                (unsigned long long)total_btree_bytes);
13220         printf("total fs tree bytes: %llu\n",
13221                (unsigned long long)total_fs_tree_bytes);
13222         printf("total extent tree bytes: %llu\n",
13223                (unsigned long long)total_extent_tree_bytes);
13224         printf("btree space waste bytes: %llu\n",
13225                (unsigned long long)btree_space_waste);
13226         printf("file data blocks allocated: %llu\n referenced %llu\n",
13227                 (unsigned long long)data_bytes_allocated,
13228                 (unsigned long long)data_bytes_referenced);
13229
13230         free_qgroup_counts();
13231         free_root_recs_tree(&root_cache);
13232 close_out:
13233         close_ctree(root);
13234 err_out:
13235         if (ctx.progress_enabled)
13236                 task_deinit(ctx.info);
13237
13238         return err;
13239 }