btrfs-progs: check: introduce low memory mode
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static int low_memory = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 struct extent_backref {
79         struct rb_node node;
80         unsigned int is_data:1;
81         unsigned int found_extent_tree:1;
82         unsigned int full_backref:1;
83         unsigned int found_ref:1;
84         unsigned int broken:1;
85 };
86
87 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
88 {
89         return rb_entry(node, struct extent_backref, node);
90 }
91
92 struct data_backref {
93         struct extent_backref node;
94         union {
95                 u64 parent;
96                 u64 root;
97         };
98         u64 owner;
99         u64 offset;
100         u64 disk_bytenr;
101         u64 bytes;
102         u64 ram_bytes;
103         u32 num_refs;
104         u32 found_ref;
105 };
106
107 static inline struct data_backref* to_data_backref(struct extent_backref *back)
108 {
109         return container_of(back, struct data_backref, node);
110 }
111
112 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
113 {
114         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
115         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
116         struct data_backref *back1 = to_data_backref(ext1);
117         struct data_backref *back2 = to_data_backref(ext2);
118
119         WARN_ON(!ext1->is_data);
120         WARN_ON(!ext2->is_data);
121
122         /* parent and root are a union, so this covers both */
123         if (back1->parent > back2->parent)
124                 return 1;
125         if (back1->parent < back2->parent)
126                 return -1;
127
128         /* This is a full backref and the parents match. */
129         if (back1->node.full_backref)
130                 return 0;
131
132         if (back1->owner > back2->owner)
133                 return 1;
134         if (back1->owner < back2->owner)
135                 return -1;
136
137         if (back1->offset > back2->offset)
138                 return 1;
139         if (back1->offset < back2->offset)
140                 return -1;
141
142         if (back1->bytes > back2->bytes)
143                 return 1;
144         if (back1->bytes < back2->bytes)
145                 return -1;
146
147         if (back1->found_ref && back2->found_ref) {
148                 if (back1->disk_bytenr > back2->disk_bytenr)
149                         return 1;
150                 if (back1->disk_bytenr < back2->disk_bytenr)
151                         return -1;
152
153                 if (back1->found_ref > back2->found_ref)
154                         return 1;
155                 if (back1->found_ref < back2->found_ref)
156                         return -1;
157         }
158
159         return 0;
160 }
161
162 /*
163  * Much like data_backref, just removed the undetermined members
164  * and change it to use list_head.
165  * During extent scan, it is stored in root->orphan_data_extent.
166  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
167  */
168 struct orphan_data_extent {
169         struct list_head list;
170         u64 root;
171         u64 objectid;
172         u64 offset;
173         u64 disk_bytenr;
174         u64 disk_len;
175 };
176
177 struct tree_backref {
178         struct extent_backref node;
179         union {
180                 u64 parent;
181                 u64 root;
182         };
183 };
184
185 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
186 {
187         return container_of(back, struct tree_backref, node);
188 }
189
190 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
191 {
192         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
193         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
194         struct tree_backref *back1 = to_tree_backref(ext1);
195         struct tree_backref *back2 = to_tree_backref(ext2);
196
197         WARN_ON(ext1->is_data);
198         WARN_ON(ext2->is_data);
199
200         /* parent and root are a union, so this covers both */
201         if (back1->parent > back2->parent)
202                 return 1;
203         if (back1->parent < back2->parent)
204                 return -1;
205
206         return 0;
207 }
208
209 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
210 {
211         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
212         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
213
214         if (ext1->is_data > ext2->is_data)
215                 return 1;
216
217         if (ext1->is_data < ext2->is_data)
218                 return -1;
219
220         if (ext1->full_backref > ext2->full_backref)
221                 return 1;
222         if (ext1->full_backref < ext2->full_backref)
223                 return -1;
224
225         if (ext1->is_data)
226                 return compare_data_backref(node1, node2);
227         else
228                 return compare_tree_backref(node1, node2);
229 }
230
231 /* Explicit initialization for extent_record::flag_block_full_backref */
232 enum { FLAG_UNSET = 2 };
233
234 struct extent_record {
235         struct list_head backrefs;
236         struct list_head dups;
237         struct rb_root backref_tree;
238         struct list_head list;
239         struct cache_extent cache;
240         struct btrfs_disk_key parent_key;
241         u64 start;
242         u64 max_size;
243         u64 nr;
244         u64 refs;
245         u64 extent_item_refs;
246         u64 generation;
247         u64 parent_generation;
248         u64 info_objectid;
249         u32 num_duplicates;
250         u8 info_level;
251         unsigned int flag_block_full_backref:2;
252         unsigned int found_rec:1;
253         unsigned int content_checked:1;
254         unsigned int owner_ref_checked:1;
255         unsigned int is_root:1;
256         unsigned int metadata:1;
257         unsigned int bad_full_backref:1;
258         unsigned int crossing_stripes:1;
259         unsigned int wrong_chunk_type:1;
260 };
261
262 static inline struct extent_record* to_extent_record(struct list_head *entry)
263 {
264         return container_of(entry, struct extent_record, list);
265 }
266
267 struct inode_backref {
268         struct list_head list;
269         unsigned int found_dir_item:1;
270         unsigned int found_dir_index:1;
271         unsigned int found_inode_ref:1;
272         unsigned int filetype:8;
273         int errors;
274         unsigned int ref_type;
275         u64 dir;
276         u64 index;
277         u16 namelen;
278         char name[0];
279 };
280
281 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
282 {
283         return list_entry(entry, struct inode_backref, list);
284 }
285
286 struct root_item_record {
287         struct list_head list;
288         u64 objectid;
289         u64 bytenr;
290         u64 last_snapshot;
291         u8 level;
292         u8 drop_level;
293         int level_size;
294         struct btrfs_key drop_key;
295 };
296
297 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
298 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
299 #define REF_ERR_NO_INODE_REF            (1 << 2)
300 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
301 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
302 #define REF_ERR_DUP_INODE_REF           (1 << 5)
303 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
304 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
305 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
306 #define REF_ERR_NO_ROOT_REF             (1 << 9)
307 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
308 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
309 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
310
311 struct file_extent_hole {
312         struct rb_node node;
313         u64 start;
314         u64 len;
315 };
316
317 struct inode_record {
318         struct list_head backrefs;
319         unsigned int checked:1;
320         unsigned int merging:1;
321         unsigned int found_inode_item:1;
322         unsigned int found_dir_item:1;
323         unsigned int found_file_extent:1;
324         unsigned int found_csum_item:1;
325         unsigned int some_csum_missing:1;
326         unsigned int nodatasum:1;
327         int errors;
328
329         u64 ino;
330         u32 nlink;
331         u32 imode;
332         u64 isize;
333         u64 nbytes;
334
335         u32 found_link;
336         u64 found_size;
337         u64 extent_start;
338         u64 extent_end;
339         struct rb_root holes;
340         struct list_head orphan_extents;
341
342         u32 refs;
343 };
344
345 #define I_ERR_NO_INODE_ITEM             (1 << 0)
346 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
347 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
348 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
349 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
350 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
351 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
352 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
353 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
354 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
355 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
356 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
357 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
358 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
359 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
360
361 struct root_backref {
362         struct list_head list;
363         unsigned int found_dir_item:1;
364         unsigned int found_dir_index:1;
365         unsigned int found_back_ref:1;
366         unsigned int found_forward_ref:1;
367         unsigned int reachable:1;
368         int errors;
369         u64 ref_root;
370         u64 dir;
371         u64 index;
372         u16 namelen;
373         char name[0];
374 };
375
376 static inline struct root_backref* to_root_backref(struct list_head *entry)
377 {
378         return list_entry(entry, struct root_backref, list);
379 }
380
381 struct root_record {
382         struct list_head backrefs;
383         struct cache_extent cache;
384         unsigned int found_root_item:1;
385         u64 objectid;
386         u32 found_ref;
387 };
388
389 struct ptr_node {
390         struct cache_extent cache;
391         void *data;
392 };
393
394 struct shared_node {
395         struct cache_extent cache;
396         struct cache_tree root_cache;
397         struct cache_tree inode_cache;
398         struct inode_record *current;
399         u32 refs;
400 };
401
402 struct block_info {
403         u64 start;
404         u32 size;
405 };
406
407 struct walk_control {
408         struct cache_tree shared;
409         struct shared_node *nodes[BTRFS_MAX_LEVEL];
410         int active_node;
411         int root_level;
412 };
413
414 struct bad_item {
415         struct btrfs_key key;
416         u64 root_id;
417         struct list_head list;
418 };
419
420 struct extent_entry {
421         u64 bytenr;
422         u64 bytes;
423         int count;
424         int broken;
425         struct list_head list;
426 };
427
428 struct root_item_info {
429         /* level of the root */
430         u8 level;
431         /* number of nodes at this level, must be 1 for a root */
432         int node_count;
433         u64 bytenr;
434         u64 gen;
435         struct cache_extent cache_extent;
436 };
437
438 /*
439  * Error bit for low memory mode check.
440  *
441  * Currently no caller cares about it yet.  Just internal use for error
442  * classification.
443  */
444 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
445 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
446 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
447 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
448 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
449 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
450 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
451 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
452 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
453 #define CHUNK_TYPE_MISMATCH     (1 << 8)
454
455 static void *print_status_check(void *p)
456 {
457         struct task_ctx *priv = p;
458         const char work_indicator[] = { '.', 'o', 'O', 'o' };
459         uint32_t count = 0;
460         static char *task_position_string[] = {
461                 "checking extents",
462                 "checking free space cache",
463                 "checking fs roots",
464         };
465
466         task_period_start(priv->info, 1000 /* 1s */);
467
468         if (priv->tp == TASK_NOTHING)
469                 return NULL;
470
471         while (1) {
472                 printf("%s [%c]\r", task_position_string[priv->tp],
473                                 work_indicator[count % 4]);
474                 count++;
475                 fflush(stdout);
476                 task_period_wait(priv->info);
477         }
478         return NULL;
479 }
480
481 static int print_status_return(void *p)
482 {
483         printf("\n");
484         fflush(stdout);
485
486         return 0;
487 }
488
489 /* Compatible function to allow reuse of old codes */
490 static u64 first_extent_gap(struct rb_root *holes)
491 {
492         struct file_extent_hole *hole;
493
494         if (RB_EMPTY_ROOT(holes))
495                 return (u64)-1;
496
497         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
498         return hole->start;
499 }
500
501 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
502 {
503         struct file_extent_hole *hole1;
504         struct file_extent_hole *hole2;
505
506         hole1 = rb_entry(node1, struct file_extent_hole, node);
507         hole2 = rb_entry(node2, struct file_extent_hole, node);
508
509         if (hole1->start > hole2->start)
510                 return -1;
511         if (hole1->start < hole2->start)
512                 return 1;
513         /* Now hole1->start == hole2->start */
514         if (hole1->len >= hole2->len)
515                 /*
516                  * Hole 1 will be merge center
517                  * Same hole will be merged later
518                  */
519                 return -1;
520         /* Hole 2 will be merge center */
521         return 1;
522 }
523
524 /*
525  * Add a hole to the record
526  *
527  * This will do hole merge for copy_file_extent_holes(),
528  * which will ensure there won't be continuous holes.
529  */
530 static int add_file_extent_hole(struct rb_root *holes,
531                                 u64 start, u64 len)
532 {
533         struct file_extent_hole *hole;
534         struct file_extent_hole *prev = NULL;
535         struct file_extent_hole *next = NULL;
536
537         hole = malloc(sizeof(*hole));
538         if (!hole)
539                 return -ENOMEM;
540         hole->start = start;
541         hole->len = len;
542         /* Since compare will not return 0, no -EEXIST will happen */
543         rb_insert(holes, &hole->node, compare_hole);
544
545         /* simple merge with previous hole */
546         if (rb_prev(&hole->node))
547                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
548                                 node);
549         if (prev && prev->start + prev->len >= hole->start) {
550                 hole->len = hole->start + hole->len - prev->start;
551                 hole->start = prev->start;
552                 rb_erase(&prev->node, holes);
553                 free(prev);
554                 prev = NULL;
555         }
556
557         /* iterate merge with next holes */
558         while (1) {
559                 if (!rb_next(&hole->node))
560                         break;
561                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
562                                         node);
563                 if (hole->start + hole->len >= next->start) {
564                         if (hole->start + hole->len <= next->start + next->len)
565                                 hole->len = next->start + next->len -
566                                             hole->start;
567                         rb_erase(&next->node, holes);
568                         free(next);
569                         next = NULL;
570                 } else
571                         break;
572         }
573         return 0;
574 }
575
576 static int compare_hole_range(struct rb_node *node, void *data)
577 {
578         struct file_extent_hole *hole;
579         u64 start;
580
581         hole = (struct file_extent_hole *)data;
582         start = hole->start;
583
584         hole = rb_entry(node, struct file_extent_hole, node);
585         if (start < hole->start)
586                 return -1;
587         if (start >= hole->start && start < hole->start + hole->len)
588                 return 0;
589         return 1;
590 }
591
592 /*
593  * Delete a hole in the record
594  *
595  * This will do the hole split and is much restrict than add.
596  */
597 static int del_file_extent_hole(struct rb_root *holes,
598                                 u64 start, u64 len)
599 {
600         struct file_extent_hole *hole;
601         struct file_extent_hole tmp;
602         u64 prev_start = 0;
603         u64 prev_len = 0;
604         u64 next_start = 0;
605         u64 next_len = 0;
606         struct rb_node *node;
607         int have_prev = 0;
608         int have_next = 0;
609         int ret = 0;
610
611         tmp.start = start;
612         tmp.len = len;
613         node = rb_search(holes, &tmp, compare_hole_range, NULL);
614         if (!node)
615                 return -EEXIST;
616         hole = rb_entry(node, struct file_extent_hole, node);
617         if (start + len > hole->start + hole->len)
618                 return -EEXIST;
619
620         /*
621          * Now there will be no overlap, delete the hole and re-add the
622          * split(s) if they exists.
623          */
624         if (start > hole->start) {
625                 prev_start = hole->start;
626                 prev_len = start - hole->start;
627                 have_prev = 1;
628         }
629         if (hole->start + hole->len > start + len) {
630                 next_start = start + len;
631                 next_len = hole->start + hole->len - start - len;
632                 have_next = 1;
633         }
634         rb_erase(node, holes);
635         free(hole);
636         if (have_prev) {
637                 ret = add_file_extent_hole(holes, prev_start, prev_len);
638                 if (ret < 0)
639                         return ret;
640         }
641         if (have_next) {
642                 ret = add_file_extent_hole(holes, next_start, next_len);
643                 if (ret < 0)
644                         return ret;
645         }
646         return 0;
647 }
648
649 static int copy_file_extent_holes(struct rb_root *dst,
650                                   struct rb_root *src)
651 {
652         struct file_extent_hole *hole;
653         struct rb_node *node;
654         int ret = 0;
655
656         node = rb_first(src);
657         while (node) {
658                 hole = rb_entry(node, struct file_extent_hole, node);
659                 ret = add_file_extent_hole(dst, hole->start, hole->len);
660                 if (ret)
661                         break;
662                 node = rb_next(node);
663         }
664         return ret;
665 }
666
667 static void free_file_extent_holes(struct rb_root *holes)
668 {
669         struct rb_node *node;
670         struct file_extent_hole *hole;
671
672         node = rb_first(holes);
673         while (node) {
674                 hole = rb_entry(node, struct file_extent_hole, node);
675                 rb_erase(node, holes);
676                 free(hole);
677                 node = rb_first(holes);
678         }
679 }
680
681 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
682
683 static void record_root_in_trans(struct btrfs_trans_handle *trans,
684                                  struct btrfs_root *root)
685 {
686         if (root->last_trans != trans->transid) {
687                 root->track_dirty = 1;
688                 root->last_trans = trans->transid;
689                 root->commit_root = root->node;
690                 extent_buffer_get(root->node);
691         }
692 }
693
694 static u8 imode_to_type(u32 imode)
695 {
696 #define S_SHIFT 12
697         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
698                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
699                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
700                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
701                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
702                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
703                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
704                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
705         };
706
707         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
708 #undef S_SHIFT
709 }
710
711 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
712 {
713         struct device_record *rec1;
714         struct device_record *rec2;
715
716         rec1 = rb_entry(node1, struct device_record, node);
717         rec2 = rb_entry(node2, struct device_record, node);
718         if (rec1->devid > rec2->devid)
719                 return -1;
720         else if (rec1->devid < rec2->devid)
721                 return 1;
722         else
723                 return 0;
724 }
725
726 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
727 {
728         struct inode_record *rec;
729         struct inode_backref *backref;
730         struct inode_backref *orig;
731         struct inode_backref *tmp;
732         struct orphan_data_extent *src_orphan;
733         struct orphan_data_extent *dst_orphan;
734         size_t size;
735         int ret;
736
737         rec = malloc(sizeof(*rec));
738         if (!rec)
739                 return ERR_PTR(-ENOMEM);
740         memcpy(rec, orig_rec, sizeof(*rec));
741         rec->refs = 1;
742         INIT_LIST_HEAD(&rec->backrefs);
743         INIT_LIST_HEAD(&rec->orphan_extents);
744         rec->holes = RB_ROOT;
745
746         list_for_each_entry(orig, &orig_rec->backrefs, list) {
747                 size = sizeof(*orig) + orig->namelen + 1;
748                 backref = malloc(size);
749                 if (!backref) {
750                         ret = -ENOMEM;
751                         goto cleanup;
752                 }
753                 memcpy(backref, orig, size);
754                 list_add_tail(&backref->list, &rec->backrefs);
755         }
756         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
757                 dst_orphan = malloc(sizeof(*dst_orphan));
758                 if (!dst_orphan) {
759                         ret = -ENOMEM;
760                         goto cleanup;
761                 }
762                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
763                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
764         }
765         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
766         BUG_ON(ret < 0);
767
768         return rec;
769
770 cleanup:
771         if (!list_empty(&rec->backrefs))
772                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
773                         list_del(&orig->list);
774                         free(orig);
775                 }
776
777         if (!list_empty(&rec->orphan_extents))
778                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
779                         list_del(&orig->list);
780                         free(orig);
781                 }
782
783         free(rec);
784
785         return ERR_PTR(ret);
786 }
787
788 static void print_orphan_data_extents(struct list_head *orphan_extents,
789                                       u64 objectid)
790 {
791         struct orphan_data_extent *orphan;
792
793         if (list_empty(orphan_extents))
794                 return;
795         printf("The following data extent is lost in tree %llu:\n",
796                objectid);
797         list_for_each_entry(orphan, orphan_extents, list) {
798                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
799                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
800                        orphan->disk_len);
801         }
802 }
803
804 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
805 {
806         u64 root_objectid = root->root_key.objectid;
807         int errors = rec->errors;
808
809         if (!errors)
810                 return;
811         /* reloc root errors, we print its corresponding fs root objectid*/
812         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
813                 root_objectid = root->root_key.offset;
814                 fprintf(stderr, "reloc");
815         }
816         fprintf(stderr, "root %llu inode %llu errors %x",
817                 (unsigned long long) root_objectid,
818                 (unsigned long long) rec->ino, rec->errors);
819
820         if (errors & I_ERR_NO_INODE_ITEM)
821                 fprintf(stderr, ", no inode item");
822         if (errors & I_ERR_NO_ORPHAN_ITEM)
823                 fprintf(stderr, ", no orphan item");
824         if (errors & I_ERR_DUP_INODE_ITEM)
825                 fprintf(stderr, ", dup inode item");
826         if (errors & I_ERR_DUP_DIR_INDEX)
827                 fprintf(stderr, ", dup dir index");
828         if (errors & I_ERR_ODD_DIR_ITEM)
829                 fprintf(stderr, ", odd dir item");
830         if (errors & I_ERR_ODD_FILE_EXTENT)
831                 fprintf(stderr, ", odd file extent");
832         if (errors & I_ERR_BAD_FILE_EXTENT)
833                 fprintf(stderr, ", bad file extent");
834         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
835                 fprintf(stderr, ", file extent overlap");
836         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
837                 fprintf(stderr, ", file extent discount");
838         if (errors & I_ERR_DIR_ISIZE_WRONG)
839                 fprintf(stderr, ", dir isize wrong");
840         if (errors & I_ERR_FILE_NBYTES_WRONG)
841                 fprintf(stderr, ", nbytes wrong");
842         if (errors & I_ERR_ODD_CSUM_ITEM)
843                 fprintf(stderr, ", odd csum item");
844         if (errors & I_ERR_SOME_CSUM_MISSING)
845                 fprintf(stderr, ", some csum missing");
846         if (errors & I_ERR_LINK_COUNT_WRONG)
847                 fprintf(stderr, ", link count wrong");
848         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
849                 fprintf(stderr, ", orphan file extent");
850         fprintf(stderr, "\n");
851         /* Print the orphan extents if needed */
852         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
853                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
854
855         /* Print the holes if needed */
856         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
857                 struct file_extent_hole *hole;
858                 struct rb_node *node;
859                 int found = 0;
860
861                 node = rb_first(&rec->holes);
862                 fprintf(stderr, "Found file extent holes:\n");
863                 while (node) {
864                         found = 1;
865                         hole = rb_entry(node, struct file_extent_hole, node);
866                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
867                                 hole->start, hole->len);
868                         node = rb_next(node);
869                 }
870                 if (!found)
871                         fprintf(stderr, "\tstart: 0, len: %llu\n",
872                                 round_up(rec->isize, root->sectorsize));
873         }
874 }
875
876 static void print_ref_error(int errors)
877 {
878         if (errors & REF_ERR_NO_DIR_ITEM)
879                 fprintf(stderr, ", no dir item");
880         if (errors & REF_ERR_NO_DIR_INDEX)
881                 fprintf(stderr, ", no dir index");
882         if (errors & REF_ERR_NO_INODE_REF)
883                 fprintf(stderr, ", no inode ref");
884         if (errors & REF_ERR_DUP_DIR_ITEM)
885                 fprintf(stderr, ", dup dir item");
886         if (errors & REF_ERR_DUP_DIR_INDEX)
887                 fprintf(stderr, ", dup dir index");
888         if (errors & REF_ERR_DUP_INODE_REF)
889                 fprintf(stderr, ", dup inode ref");
890         if (errors & REF_ERR_INDEX_UNMATCH)
891                 fprintf(stderr, ", index mismatch");
892         if (errors & REF_ERR_FILETYPE_UNMATCH)
893                 fprintf(stderr, ", filetype mismatch");
894         if (errors & REF_ERR_NAME_TOO_LONG)
895                 fprintf(stderr, ", name too long");
896         if (errors & REF_ERR_NO_ROOT_REF)
897                 fprintf(stderr, ", no root ref");
898         if (errors & REF_ERR_NO_ROOT_BACKREF)
899                 fprintf(stderr, ", no root backref");
900         if (errors & REF_ERR_DUP_ROOT_REF)
901                 fprintf(stderr, ", dup root ref");
902         if (errors & REF_ERR_DUP_ROOT_BACKREF)
903                 fprintf(stderr, ", dup root backref");
904         fprintf(stderr, "\n");
905 }
906
907 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
908                                           u64 ino, int mod)
909 {
910         struct ptr_node *node;
911         struct cache_extent *cache;
912         struct inode_record *rec = NULL;
913         int ret;
914
915         cache = lookup_cache_extent(inode_cache, ino, 1);
916         if (cache) {
917                 node = container_of(cache, struct ptr_node, cache);
918                 rec = node->data;
919                 if (mod && rec->refs > 1) {
920                         node->data = clone_inode_rec(rec);
921                         if (IS_ERR(node->data))
922                                 return node->data;
923                         rec->refs--;
924                         rec = node->data;
925                 }
926         } else if (mod) {
927                 rec = calloc(1, sizeof(*rec));
928                 if (!rec)
929                         return ERR_PTR(-ENOMEM);
930                 rec->ino = ino;
931                 rec->extent_start = (u64)-1;
932                 rec->refs = 1;
933                 INIT_LIST_HEAD(&rec->backrefs);
934                 INIT_LIST_HEAD(&rec->orphan_extents);
935                 rec->holes = RB_ROOT;
936
937                 node = malloc(sizeof(*node));
938                 if (!node) {
939                         free(rec);
940                         return ERR_PTR(-ENOMEM);
941                 }
942                 node->cache.start = ino;
943                 node->cache.size = 1;
944                 node->data = rec;
945
946                 if (ino == BTRFS_FREE_INO_OBJECTID)
947                         rec->found_link = 1;
948
949                 ret = insert_cache_extent(inode_cache, &node->cache);
950                 if (ret)
951                         return ERR_PTR(-EEXIST);
952         }
953         return rec;
954 }
955
956 static void free_orphan_data_extents(struct list_head *orphan_extents)
957 {
958         struct orphan_data_extent *orphan;
959
960         while (!list_empty(orphan_extents)) {
961                 orphan = list_entry(orphan_extents->next,
962                                     struct orphan_data_extent, list);
963                 list_del(&orphan->list);
964                 free(orphan);
965         }
966 }
967
968 static void free_inode_rec(struct inode_record *rec)
969 {
970         struct inode_backref *backref;
971
972         if (--rec->refs > 0)
973                 return;
974
975         while (!list_empty(&rec->backrefs)) {
976                 backref = to_inode_backref(rec->backrefs.next);
977                 list_del(&backref->list);
978                 free(backref);
979         }
980         free_orphan_data_extents(&rec->orphan_extents);
981         free_file_extent_holes(&rec->holes);
982         free(rec);
983 }
984
985 static int can_free_inode_rec(struct inode_record *rec)
986 {
987         if (!rec->errors && rec->checked && rec->found_inode_item &&
988             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
989                 return 1;
990         return 0;
991 }
992
993 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
994                                  struct inode_record *rec)
995 {
996         struct cache_extent *cache;
997         struct inode_backref *tmp, *backref;
998         struct ptr_node *node;
999         unsigned char filetype;
1000
1001         if (!rec->found_inode_item)
1002                 return;
1003
1004         filetype = imode_to_type(rec->imode);
1005         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1006                 if (backref->found_dir_item && backref->found_dir_index) {
1007                         if (backref->filetype != filetype)
1008                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1009                         if (!backref->errors && backref->found_inode_ref &&
1010                             rec->nlink == rec->found_link) {
1011                                 list_del(&backref->list);
1012                                 free(backref);
1013                         }
1014                 }
1015         }
1016
1017         if (!rec->checked || rec->merging)
1018                 return;
1019
1020         if (S_ISDIR(rec->imode)) {
1021                 if (rec->found_size != rec->isize)
1022                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1023                 if (rec->found_file_extent)
1024                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1025         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1026                 if (rec->found_dir_item)
1027                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1028                 if (rec->found_size != rec->nbytes)
1029                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1030                 if (rec->nlink > 0 && !no_holes &&
1031                     (rec->extent_end < rec->isize ||
1032                      first_extent_gap(&rec->holes) < rec->isize))
1033                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1034         }
1035
1036         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1037                 if (rec->found_csum_item && rec->nodatasum)
1038                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1039                 if (rec->some_csum_missing && !rec->nodatasum)
1040                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1041         }
1042
1043         BUG_ON(rec->refs != 1);
1044         if (can_free_inode_rec(rec)) {
1045                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1046                 node = container_of(cache, struct ptr_node, cache);
1047                 BUG_ON(node->data != rec);
1048                 remove_cache_extent(inode_cache, &node->cache);
1049                 free(node);
1050                 free_inode_rec(rec);
1051         }
1052 }
1053
1054 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1055 {
1056         struct btrfs_path path;
1057         struct btrfs_key key;
1058         int ret;
1059
1060         key.objectid = BTRFS_ORPHAN_OBJECTID;
1061         key.type = BTRFS_ORPHAN_ITEM_KEY;
1062         key.offset = ino;
1063
1064         btrfs_init_path(&path);
1065         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1066         btrfs_release_path(&path);
1067         if (ret > 0)
1068                 ret = -ENOENT;
1069         return ret;
1070 }
1071
1072 static int process_inode_item(struct extent_buffer *eb,
1073                               int slot, struct btrfs_key *key,
1074                               struct shared_node *active_node)
1075 {
1076         struct inode_record *rec;
1077         struct btrfs_inode_item *item;
1078
1079         rec = active_node->current;
1080         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1081         if (rec->found_inode_item) {
1082                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1083                 return 1;
1084         }
1085         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1086         rec->nlink = btrfs_inode_nlink(eb, item);
1087         rec->isize = btrfs_inode_size(eb, item);
1088         rec->nbytes = btrfs_inode_nbytes(eb, item);
1089         rec->imode = btrfs_inode_mode(eb, item);
1090         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1091                 rec->nodatasum = 1;
1092         rec->found_inode_item = 1;
1093         if (rec->nlink == 0)
1094                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1095         maybe_free_inode_rec(&active_node->inode_cache, rec);
1096         return 0;
1097 }
1098
1099 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1100                                                 const char *name,
1101                                                 int namelen, u64 dir)
1102 {
1103         struct inode_backref *backref;
1104
1105         list_for_each_entry(backref, &rec->backrefs, list) {
1106                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1107                         break;
1108                 if (backref->dir != dir || backref->namelen != namelen)
1109                         continue;
1110                 if (memcmp(name, backref->name, namelen))
1111                         continue;
1112                 return backref;
1113         }
1114
1115         backref = malloc(sizeof(*backref) + namelen + 1);
1116         if (!backref)
1117                 return NULL;
1118         memset(backref, 0, sizeof(*backref));
1119         backref->dir = dir;
1120         backref->namelen = namelen;
1121         memcpy(backref->name, name, namelen);
1122         backref->name[namelen] = '\0';
1123         list_add_tail(&backref->list, &rec->backrefs);
1124         return backref;
1125 }
1126
1127 static int add_inode_backref(struct cache_tree *inode_cache,
1128                              u64 ino, u64 dir, u64 index,
1129                              const char *name, int namelen,
1130                              int filetype, int itemtype, int errors)
1131 {
1132         struct inode_record *rec;
1133         struct inode_backref *backref;
1134
1135         rec = get_inode_rec(inode_cache, ino, 1);
1136         BUG_ON(IS_ERR(rec));
1137         backref = get_inode_backref(rec, name, namelen, dir);
1138         BUG_ON(!backref);
1139         if (errors)
1140                 backref->errors |= errors;
1141         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1142                 if (backref->found_dir_index)
1143                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1144                 if (backref->found_inode_ref && backref->index != index)
1145                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1146                 if (backref->found_dir_item && backref->filetype != filetype)
1147                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1148
1149                 backref->index = index;
1150                 backref->filetype = filetype;
1151                 backref->found_dir_index = 1;
1152         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1153                 rec->found_link++;
1154                 if (backref->found_dir_item)
1155                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1156                 if (backref->found_dir_index && backref->filetype != filetype)
1157                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1158
1159                 backref->filetype = filetype;
1160                 backref->found_dir_item = 1;
1161         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1162                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1163                 if (backref->found_inode_ref)
1164                         backref->errors |= REF_ERR_DUP_INODE_REF;
1165                 if (backref->found_dir_index && backref->index != index)
1166                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1167                 else
1168                         backref->index = index;
1169
1170                 backref->ref_type = itemtype;
1171                 backref->found_inode_ref = 1;
1172         } else {
1173                 BUG_ON(1);
1174         }
1175
1176         maybe_free_inode_rec(inode_cache, rec);
1177         return 0;
1178 }
1179
1180 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1181                             struct cache_tree *dst_cache)
1182 {
1183         struct inode_backref *backref;
1184         u32 dir_count = 0;
1185         int ret = 0;
1186
1187         dst->merging = 1;
1188         list_for_each_entry(backref, &src->backrefs, list) {
1189                 if (backref->found_dir_index) {
1190                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1191                                         backref->index, backref->name,
1192                                         backref->namelen, backref->filetype,
1193                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1194                 }
1195                 if (backref->found_dir_item) {
1196                         dir_count++;
1197                         add_inode_backref(dst_cache, dst->ino,
1198                                         backref->dir, 0, backref->name,
1199                                         backref->namelen, backref->filetype,
1200                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1201                 }
1202                 if (backref->found_inode_ref) {
1203                         add_inode_backref(dst_cache, dst->ino,
1204                                         backref->dir, backref->index,
1205                                         backref->name, backref->namelen, 0,
1206                                         backref->ref_type, backref->errors);
1207                 }
1208         }
1209
1210         if (src->found_dir_item)
1211                 dst->found_dir_item = 1;
1212         if (src->found_file_extent)
1213                 dst->found_file_extent = 1;
1214         if (src->found_csum_item)
1215                 dst->found_csum_item = 1;
1216         if (src->some_csum_missing)
1217                 dst->some_csum_missing = 1;
1218         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1219                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1220                 if (ret < 0)
1221                         return ret;
1222         }
1223
1224         BUG_ON(src->found_link < dir_count);
1225         dst->found_link += src->found_link - dir_count;
1226         dst->found_size += src->found_size;
1227         if (src->extent_start != (u64)-1) {
1228                 if (dst->extent_start == (u64)-1) {
1229                         dst->extent_start = src->extent_start;
1230                         dst->extent_end = src->extent_end;
1231                 } else {
1232                         if (dst->extent_end > src->extent_start)
1233                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1234                         else if (dst->extent_end < src->extent_start) {
1235                                 ret = add_file_extent_hole(&dst->holes,
1236                                         dst->extent_end,
1237                                         src->extent_start - dst->extent_end);
1238                         }
1239                         if (dst->extent_end < src->extent_end)
1240                                 dst->extent_end = src->extent_end;
1241                 }
1242         }
1243
1244         dst->errors |= src->errors;
1245         if (src->found_inode_item) {
1246                 if (!dst->found_inode_item) {
1247                         dst->nlink = src->nlink;
1248                         dst->isize = src->isize;
1249                         dst->nbytes = src->nbytes;
1250                         dst->imode = src->imode;
1251                         dst->nodatasum = src->nodatasum;
1252                         dst->found_inode_item = 1;
1253                 } else {
1254                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1255                 }
1256         }
1257         dst->merging = 0;
1258
1259         return 0;
1260 }
1261
1262 static int splice_shared_node(struct shared_node *src_node,
1263                               struct shared_node *dst_node)
1264 {
1265         struct cache_extent *cache;
1266         struct ptr_node *node, *ins;
1267         struct cache_tree *src, *dst;
1268         struct inode_record *rec, *conflict;
1269         u64 current_ino = 0;
1270         int splice = 0;
1271         int ret;
1272
1273         if (--src_node->refs == 0)
1274                 splice = 1;
1275         if (src_node->current)
1276                 current_ino = src_node->current->ino;
1277
1278         src = &src_node->root_cache;
1279         dst = &dst_node->root_cache;
1280 again:
1281         cache = search_cache_extent(src, 0);
1282         while (cache) {
1283                 node = container_of(cache, struct ptr_node, cache);
1284                 rec = node->data;
1285                 cache = next_cache_extent(cache);
1286
1287                 if (splice) {
1288                         remove_cache_extent(src, &node->cache);
1289                         ins = node;
1290                 } else {
1291                         ins = malloc(sizeof(*ins));
1292                         BUG_ON(!ins);
1293                         ins->cache.start = node->cache.start;
1294                         ins->cache.size = node->cache.size;
1295                         ins->data = rec;
1296                         rec->refs++;
1297                 }
1298                 ret = insert_cache_extent(dst, &ins->cache);
1299                 if (ret == -EEXIST) {
1300                         conflict = get_inode_rec(dst, rec->ino, 1);
1301                         BUG_ON(IS_ERR(conflict));
1302                         merge_inode_recs(rec, conflict, dst);
1303                         if (rec->checked) {
1304                                 conflict->checked = 1;
1305                                 if (dst_node->current == conflict)
1306                                         dst_node->current = NULL;
1307                         }
1308                         maybe_free_inode_rec(dst, conflict);
1309                         free_inode_rec(rec);
1310                         free(ins);
1311                 } else {
1312                         BUG_ON(ret);
1313                 }
1314         }
1315
1316         if (src == &src_node->root_cache) {
1317                 src = &src_node->inode_cache;
1318                 dst = &dst_node->inode_cache;
1319                 goto again;
1320         }
1321
1322         if (current_ino > 0 && (!dst_node->current ||
1323             current_ino > dst_node->current->ino)) {
1324                 if (dst_node->current) {
1325                         dst_node->current->checked = 1;
1326                         maybe_free_inode_rec(dst, dst_node->current);
1327                 }
1328                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1329                 BUG_ON(IS_ERR(dst_node->current));
1330         }
1331         return 0;
1332 }
1333
1334 static void free_inode_ptr(struct cache_extent *cache)
1335 {
1336         struct ptr_node *node;
1337         struct inode_record *rec;
1338
1339         node = container_of(cache, struct ptr_node, cache);
1340         rec = node->data;
1341         free_inode_rec(rec);
1342         free(node);
1343 }
1344
1345 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1346
1347 static struct shared_node *find_shared_node(struct cache_tree *shared,
1348                                             u64 bytenr)
1349 {
1350         struct cache_extent *cache;
1351         struct shared_node *node;
1352
1353         cache = lookup_cache_extent(shared, bytenr, 1);
1354         if (cache) {
1355                 node = container_of(cache, struct shared_node, cache);
1356                 return node;
1357         }
1358         return NULL;
1359 }
1360
1361 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1362 {
1363         int ret;
1364         struct shared_node *node;
1365
1366         node = calloc(1, sizeof(*node));
1367         if (!node)
1368                 return -ENOMEM;
1369         node->cache.start = bytenr;
1370         node->cache.size = 1;
1371         cache_tree_init(&node->root_cache);
1372         cache_tree_init(&node->inode_cache);
1373         node->refs = refs;
1374
1375         ret = insert_cache_extent(shared, &node->cache);
1376
1377         return ret;
1378 }
1379
1380 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1381                              struct walk_control *wc, int level)
1382 {
1383         struct shared_node *node;
1384         struct shared_node *dest;
1385         int ret;
1386
1387         if (level == wc->active_node)
1388                 return 0;
1389
1390         BUG_ON(wc->active_node <= level);
1391         node = find_shared_node(&wc->shared, bytenr);
1392         if (!node) {
1393                 ret = add_shared_node(&wc->shared, bytenr, refs);
1394                 BUG_ON(ret);
1395                 node = find_shared_node(&wc->shared, bytenr);
1396                 wc->nodes[level] = node;
1397                 wc->active_node = level;
1398                 return 0;
1399         }
1400
1401         if (wc->root_level == wc->active_node &&
1402             btrfs_root_refs(&root->root_item) == 0) {
1403                 if (--node->refs == 0) {
1404                         free_inode_recs_tree(&node->root_cache);
1405                         free_inode_recs_tree(&node->inode_cache);
1406                         remove_cache_extent(&wc->shared, &node->cache);
1407                         free(node);
1408                 }
1409                 return 1;
1410         }
1411
1412         dest = wc->nodes[wc->active_node];
1413         splice_shared_node(node, dest);
1414         if (node->refs == 0) {
1415                 remove_cache_extent(&wc->shared, &node->cache);
1416                 free(node);
1417         }
1418         return 1;
1419 }
1420
1421 static int leave_shared_node(struct btrfs_root *root,
1422                              struct walk_control *wc, int level)
1423 {
1424         struct shared_node *node;
1425         struct shared_node *dest;
1426         int i;
1427
1428         if (level == wc->root_level)
1429                 return 0;
1430
1431         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1432                 if (wc->nodes[i])
1433                         break;
1434         }
1435         BUG_ON(i >= BTRFS_MAX_LEVEL);
1436
1437         node = wc->nodes[wc->active_node];
1438         wc->nodes[wc->active_node] = NULL;
1439         wc->active_node = i;
1440
1441         dest = wc->nodes[wc->active_node];
1442         if (wc->active_node < wc->root_level ||
1443             btrfs_root_refs(&root->root_item) > 0) {
1444                 BUG_ON(node->refs <= 1);
1445                 splice_shared_node(node, dest);
1446         } else {
1447                 BUG_ON(node->refs < 2);
1448                 node->refs--;
1449         }
1450         return 0;
1451 }
1452
1453 /*
1454  * Returns:
1455  * < 0 - on error
1456  * 1   - if the root with id child_root_id is a child of root parent_root_id
1457  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1458  *       has other root(s) as parent(s)
1459  * 2   - if the root child_root_id doesn't have any parent roots
1460  */
1461 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1462                          u64 child_root_id)
1463 {
1464         struct btrfs_path path;
1465         struct btrfs_key key;
1466         struct extent_buffer *leaf;
1467         int has_parent = 0;
1468         int ret;
1469
1470         btrfs_init_path(&path);
1471
1472         key.objectid = parent_root_id;
1473         key.type = BTRFS_ROOT_REF_KEY;
1474         key.offset = child_root_id;
1475         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1476                                 0, 0);
1477         if (ret < 0)
1478                 return ret;
1479         btrfs_release_path(&path);
1480         if (!ret)
1481                 return 1;
1482
1483         key.objectid = child_root_id;
1484         key.type = BTRFS_ROOT_BACKREF_KEY;
1485         key.offset = 0;
1486         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1487                                 0, 0);
1488         if (ret < 0)
1489                 goto out;
1490
1491         while (1) {
1492                 leaf = path.nodes[0];
1493                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1494                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1495                         if (ret)
1496                                 break;
1497                         leaf = path.nodes[0];
1498                 }
1499
1500                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1501                 if (key.objectid != child_root_id ||
1502                     key.type != BTRFS_ROOT_BACKREF_KEY)
1503                         break;
1504
1505                 has_parent = 1;
1506
1507                 if (key.offset == parent_root_id) {
1508                         btrfs_release_path(&path);
1509                         return 1;
1510                 }
1511
1512                 path.slots[0]++;
1513         }
1514 out:
1515         btrfs_release_path(&path);
1516         if (ret < 0)
1517                 return ret;
1518         return has_parent ? 0 : 2;
1519 }
1520
1521 static int process_dir_item(struct btrfs_root *root,
1522                             struct extent_buffer *eb,
1523                             int slot, struct btrfs_key *key,
1524                             struct shared_node *active_node)
1525 {
1526         u32 total;
1527         u32 cur = 0;
1528         u32 len;
1529         u32 name_len;
1530         u32 data_len;
1531         int error;
1532         int nritems = 0;
1533         int filetype;
1534         struct btrfs_dir_item *di;
1535         struct inode_record *rec;
1536         struct cache_tree *root_cache;
1537         struct cache_tree *inode_cache;
1538         struct btrfs_key location;
1539         char namebuf[BTRFS_NAME_LEN];
1540
1541         root_cache = &active_node->root_cache;
1542         inode_cache = &active_node->inode_cache;
1543         rec = active_node->current;
1544         rec->found_dir_item = 1;
1545
1546         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1547         total = btrfs_item_size_nr(eb, slot);
1548         while (cur < total) {
1549                 nritems++;
1550                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1551                 name_len = btrfs_dir_name_len(eb, di);
1552                 data_len = btrfs_dir_data_len(eb, di);
1553                 filetype = btrfs_dir_type(eb, di);
1554
1555                 rec->found_size += name_len;
1556                 if (name_len <= BTRFS_NAME_LEN) {
1557                         len = name_len;
1558                         error = 0;
1559                 } else {
1560                         len = BTRFS_NAME_LEN;
1561                         error = REF_ERR_NAME_TOO_LONG;
1562                 }
1563                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1564
1565                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1566                         add_inode_backref(inode_cache, location.objectid,
1567                                           key->objectid, key->offset, namebuf,
1568                                           len, filetype, key->type, error);
1569                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1570                         add_inode_backref(root_cache, location.objectid,
1571                                           key->objectid, key->offset,
1572                                           namebuf, len, filetype,
1573                                           key->type, error);
1574                 } else {
1575                         fprintf(stderr, "invalid location in dir item %u\n",
1576                                 location.type);
1577                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1578                                           key->objectid, key->offset, namebuf,
1579                                           len, filetype, key->type, error);
1580                 }
1581
1582                 len = sizeof(*di) + name_len + data_len;
1583                 di = (struct btrfs_dir_item *)((char *)di + len);
1584                 cur += len;
1585         }
1586         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1587                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1588
1589         return 0;
1590 }
1591
1592 static int process_inode_ref(struct extent_buffer *eb,
1593                              int slot, struct btrfs_key *key,
1594                              struct shared_node *active_node)
1595 {
1596         u32 total;
1597         u32 cur = 0;
1598         u32 len;
1599         u32 name_len;
1600         u64 index;
1601         int error;
1602         struct cache_tree *inode_cache;
1603         struct btrfs_inode_ref *ref;
1604         char namebuf[BTRFS_NAME_LEN];
1605
1606         inode_cache = &active_node->inode_cache;
1607
1608         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1609         total = btrfs_item_size_nr(eb, slot);
1610         while (cur < total) {
1611                 name_len = btrfs_inode_ref_name_len(eb, ref);
1612                 index = btrfs_inode_ref_index(eb, ref);
1613                 if (name_len <= BTRFS_NAME_LEN) {
1614                         len = name_len;
1615                         error = 0;
1616                 } else {
1617                         len = BTRFS_NAME_LEN;
1618                         error = REF_ERR_NAME_TOO_LONG;
1619                 }
1620                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1621                 add_inode_backref(inode_cache, key->objectid, key->offset,
1622                                   index, namebuf, len, 0, key->type, error);
1623
1624                 len = sizeof(*ref) + name_len;
1625                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1626                 cur += len;
1627         }
1628         return 0;
1629 }
1630
1631 static int process_inode_extref(struct extent_buffer *eb,
1632                                 int slot, struct btrfs_key *key,
1633                                 struct shared_node *active_node)
1634 {
1635         u32 total;
1636         u32 cur = 0;
1637         u32 len;
1638         u32 name_len;
1639         u64 index;
1640         u64 parent;
1641         int error;
1642         struct cache_tree *inode_cache;
1643         struct btrfs_inode_extref *extref;
1644         char namebuf[BTRFS_NAME_LEN];
1645
1646         inode_cache = &active_node->inode_cache;
1647
1648         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1649         total = btrfs_item_size_nr(eb, slot);
1650         while (cur < total) {
1651                 name_len = btrfs_inode_extref_name_len(eb, extref);
1652                 index = btrfs_inode_extref_index(eb, extref);
1653                 parent = btrfs_inode_extref_parent(eb, extref);
1654                 if (name_len <= BTRFS_NAME_LEN) {
1655                         len = name_len;
1656                         error = 0;
1657                 } else {
1658                         len = BTRFS_NAME_LEN;
1659                         error = REF_ERR_NAME_TOO_LONG;
1660                 }
1661                 read_extent_buffer(eb, namebuf,
1662                                    (unsigned long)(extref + 1), len);
1663                 add_inode_backref(inode_cache, key->objectid, parent,
1664                                   index, namebuf, len, 0, key->type, error);
1665
1666                 len = sizeof(*extref) + name_len;
1667                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1668                 cur += len;
1669         }
1670         return 0;
1671
1672 }
1673
1674 static int count_csum_range(struct btrfs_root *root, u64 start,
1675                             u64 len, u64 *found)
1676 {
1677         struct btrfs_key key;
1678         struct btrfs_path path;
1679         struct extent_buffer *leaf;
1680         int ret;
1681         size_t size;
1682         *found = 0;
1683         u64 csum_end;
1684         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1685
1686         btrfs_init_path(&path);
1687
1688         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1689         key.offset = start;
1690         key.type = BTRFS_EXTENT_CSUM_KEY;
1691
1692         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1693                                 &key, &path, 0, 0);
1694         if (ret < 0)
1695                 goto out;
1696         if (ret > 0 && path.slots[0] > 0) {
1697                 leaf = path.nodes[0];
1698                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1699                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1700                     key.type == BTRFS_EXTENT_CSUM_KEY)
1701                         path.slots[0]--;
1702         }
1703
1704         while (len > 0) {
1705                 leaf = path.nodes[0];
1706                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1707                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1708                         if (ret > 0)
1709                                 break;
1710                         else if (ret < 0)
1711                                 goto out;
1712                         leaf = path.nodes[0];
1713                 }
1714
1715                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1716                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1717                     key.type != BTRFS_EXTENT_CSUM_KEY)
1718                         break;
1719
1720                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1721                 if (key.offset >= start + len)
1722                         break;
1723
1724                 if (key.offset > start)
1725                         start = key.offset;
1726
1727                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1728                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1729                 if (csum_end > start) {
1730                         size = min(csum_end - start, len);
1731                         len -= size;
1732                         start += size;
1733                         *found += size;
1734                 }
1735
1736                 path.slots[0]++;
1737         }
1738 out:
1739         btrfs_release_path(&path);
1740         if (ret < 0)
1741                 return ret;
1742         return 0;
1743 }
1744
1745 static int process_file_extent(struct btrfs_root *root,
1746                                 struct extent_buffer *eb,
1747                                 int slot, struct btrfs_key *key,
1748                                 struct shared_node *active_node)
1749 {
1750         struct inode_record *rec;
1751         struct btrfs_file_extent_item *fi;
1752         u64 num_bytes = 0;
1753         u64 disk_bytenr = 0;
1754         u64 extent_offset = 0;
1755         u64 mask = root->sectorsize - 1;
1756         int extent_type;
1757         int ret;
1758
1759         rec = active_node->current;
1760         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1761         rec->found_file_extent = 1;
1762
1763         if (rec->extent_start == (u64)-1) {
1764                 rec->extent_start = key->offset;
1765                 rec->extent_end = key->offset;
1766         }
1767
1768         if (rec->extent_end > key->offset)
1769                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1770         else if (rec->extent_end < key->offset) {
1771                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1772                                            key->offset - rec->extent_end);
1773                 if (ret < 0)
1774                         return ret;
1775         }
1776
1777         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1778         extent_type = btrfs_file_extent_type(eb, fi);
1779
1780         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1781                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1782                 if (num_bytes == 0)
1783                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1784                 rec->found_size += num_bytes;
1785                 num_bytes = (num_bytes + mask) & ~mask;
1786         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1787                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1788                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1789                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1790                 extent_offset = btrfs_file_extent_offset(eb, fi);
1791                 if (num_bytes == 0 || (num_bytes & mask))
1792                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1793                 if (num_bytes + extent_offset >
1794                     btrfs_file_extent_ram_bytes(eb, fi))
1795                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1796                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1797                     (btrfs_file_extent_compression(eb, fi) ||
1798                      btrfs_file_extent_encryption(eb, fi) ||
1799                      btrfs_file_extent_other_encoding(eb, fi)))
1800                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1801                 if (disk_bytenr > 0)
1802                         rec->found_size += num_bytes;
1803         } else {
1804                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1805         }
1806         rec->extent_end = key->offset + num_bytes;
1807
1808         /*
1809          * The data reloc tree will copy full extents into its inode and then
1810          * copy the corresponding csums.  Because the extent it copied could be
1811          * a preallocated extent that hasn't been written to yet there may be no
1812          * csums to copy, ergo we won't have csums for our file extent.  This is
1813          * ok so just don't bother checking csums if the inode belongs to the
1814          * data reloc tree.
1815          */
1816         if (disk_bytenr > 0 &&
1817             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1818                 u64 found;
1819                 if (btrfs_file_extent_compression(eb, fi))
1820                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1821                 else
1822                         disk_bytenr += extent_offset;
1823
1824                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1825                 if (ret < 0)
1826                         return ret;
1827                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1828                         if (found > 0)
1829                                 rec->found_csum_item = 1;
1830                         if (found < num_bytes)
1831                                 rec->some_csum_missing = 1;
1832                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1833                         if (found > 0)
1834                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1835                 }
1836         }
1837         return 0;
1838 }
1839
1840 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1841                             struct walk_control *wc)
1842 {
1843         struct btrfs_key key;
1844         u32 nritems;
1845         int i;
1846         int ret = 0;
1847         struct cache_tree *inode_cache;
1848         struct shared_node *active_node;
1849
1850         if (wc->root_level == wc->active_node &&
1851             btrfs_root_refs(&root->root_item) == 0)
1852                 return 0;
1853
1854         active_node = wc->nodes[wc->active_node];
1855         inode_cache = &active_node->inode_cache;
1856         nritems = btrfs_header_nritems(eb);
1857         for (i = 0; i < nritems; i++) {
1858                 btrfs_item_key_to_cpu(eb, &key, i);
1859
1860                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1861                         continue;
1862                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1863                         continue;
1864
1865                 if (active_node->current == NULL ||
1866                     active_node->current->ino < key.objectid) {
1867                         if (active_node->current) {
1868                                 active_node->current->checked = 1;
1869                                 maybe_free_inode_rec(inode_cache,
1870                                                      active_node->current);
1871                         }
1872                         active_node->current = get_inode_rec(inode_cache,
1873                                                              key.objectid, 1);
1874                         BUG_ON(IS_ERR(active_node->current));
1875                 }
1876                 switch (key.type) {
1877                 case BTRFS_DIR_ITEM_KEY:
1878                 case BTRFS_DIR_INDEX_KEY:
1879                         ret = process_dir_item(root, eb, i, &key, active_node);
1880                         break;
1881                 case BTRFS_INODE_REF_KEY:
1882                         ret = process_inode_ref(eb, i, &key, active_node);
1883                         break;
1884                 case BTRFS_INODE_EXTREF_KEY:
1885                         ret = process_inode_extref(eb, i, &key, active_node);
1886                         break;
1887                 case BTRFS_INODE_ITEM_KEY:
1888                         ret = process_inode_item(eb, i, &key, active_node);
1889                         break;
1890                 case BTRFS_EXTENT_DATA_KEY:
1891                         ret = process_file_extent(root, eb, i, &key,
1892                                                   active_node);
1893                         break;
1894                 default:
1895                         break;
1896                 };
1897         }
1898         return ret;
1899 }
1900
1901 static void reada_walk_down(struct btrfs_root *root,
1902                             struct extent_buffer *node, int slot)
1903 {
1904         u64 bytenr;
1905         u64 ptr_gen;
1906         u32 nritems;
1907         u32 blocksize;
1908         int i;
1909         int level;
1910
1911         level = btrfs_header_level(node);
1912         if (level != 1)
1913                 return;
1914
1915         nritems = btrfs_header_nritems(node);
1916         blocksize = root->nodesize;
1917         for (i = slot; i < nritems; i++) {
1918                 bytenr = btrfs_node_blockptr(node, i);
1919                 ptr_gen = btrfs_node_ptr_generation(node, i);
1920                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1921         }
1922 }
1923
1924 /*
1925  * Check the child node/leaf by the following condition:
1926  * 1. the first item key of the node/leaf should be the same with the one
1927  *    in parent.
1928  * 2. block in parent node should match the child node/leaf.
1929  * 3. generation of parent node and child's header should be consistent.
1930  *
1931  * Or the child node/leaf pointed by the key in parent is not valid.
1932  *
1933  * We hope to check leaf owner too, but since subvol may share leaves,
1934  * which makes leaf owner check not so strong, key check should be
1935  * sufficient enough for that case.
1936  */
1937 static int check_child_node(struct btrfs_root *root,
1938                             struct extent_buffer *parent, int slot,
1939                             struct extent_buffer *child)
1940 {
1941         struct btrfs_key parent_key;
1942         struct btrfs_key child_key;
1943         int ret = 0;
1944
1945         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1946         if (btrfs_header_level(child) == 0)
1947                 btrfs_item_key_to_cpu(child, &child_key, 0);
1948         else
1949                 btrfs_node_key_to_cpu(child, &child_key, 0);
1950
1951         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1952                 ret = -EINVAL;
1953                 fprintf(stderr,
1954                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1955                         parent_key.objectid, parent_key.type, parent_key.offset,
1956                         child_key.objectid, child_key.type, child_key.offset);
1957         }
1958         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1959                 ret = -EINVAL;
1960                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1961                         btrfs_node_blockptr(parent, slot),
1962                         btrfs_header_bytenr(child));
1963         }
1964         if (btrfs_node_ptr_generation(parent, slot) !=
1965             btrfs_header_generation(child)) {
1966                 ret = -EINVAL;
1967                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1968                         btrfs_header_generation(child),
1969                         btrfs_node_ptr_generation(parent, slot));
1970         }
1971         return ret;
1972 }
1973
1974 struct node_refs {
1975         u64 bytenr[BTRFS_MAX_LEVEL];
1976         u64 refs[BTRFS_MAX_LEVEL];
1977 };
1978
1979 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1980                           struct walk_control *wc, int *level,
1981                           struct node_refs *nrefs)
1982 {
1983         enum btrfs_tree_block_status status;
1984         u64 bytenr;
1985         u64 ptr_gen;
1986         struct extent_buffer *next;
1987         struct extent_buffer *cur;
1988         u32 blocksize;
1989         int ret, err = 0;
1990         u64 refs;
1991
1992         WARN_ON(*level < 0);
1993         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1994
1995         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1996                 refs = nrefs->refs[*level];
1997                 ret = 0;
1998         } else {
1999                 ret = btrfs_lookup_extent_info(NULL, root,
2000                                        path->nodes[*level]->start,
2001                                        *level, 1, &refs, NULL);
2002                 if (ret < 0) {
2003                         err = ret;
2004                         goto out;
2005                 }
2006                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2007                 nrefs->refs[*level] = refs;
2008         }
2009
2010         if (refs > 1) {
2011                 ret = enter_shared_node(root, path->nodes[*level]->start,
2012                                         refs, wc, *level);
2013                 if (ret > 0) {
2014                         err = ret;
2015                         goto out;
2016                 }
2017         }
2018
2019         while (*level >= 0) {
2020                 WARN_ON(*level < 0);
2021                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2022                 cur = path->nodes[*level];
2023
2024                 if (btrfs_header_level(cur) != *level)
2025                         WARN_ON(1);
2026
2027                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2028                         break;
2029                 if (*level == 0) {
2030                         ret = process_one_leaf(root, cur, wc);
2031                         if (ret < 0)
2032                                 err = ret;
2033                         break;
2034                 }
2035                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2036                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2037                 blocksize = root->nodesize;
2038
2039                 if (bytenr == nrefs->bytenr[*level - 1]) {
2040                         refs = nrefs->refs[*level - 1];
2041                 } else {
2042                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2043                                         *level - 1, 1, &refs, NULL);
2044                         if (ret < 0) {
2045                                 refs = 0;
2046                         } else {
2047                                 nrefs->bytenr[*level - 1] = bytenr;
2048                                 nrefs->refs[*level - 1] = refs;
2049                         }
2050                 }
2051
2052                 if (refs > 1) {
2053                         ret = enter_shared_node(root, bytenr, refs,
2054                                                 wc, *level - 1);
2055                         if (ret > 0) {
2056                                 path->slots[*level]++;
2057                                 continue;
2058                         }
2059                 }
2060
2061                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2062                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2063                         free_extent_buffer(next);
2064                         reada_walk_down(root, cur, path->slots[*level]);
2065                         next = read_tree_block(root, bytenr, blocksize,
2066                                                ptr_gen);
2067                         if (!extent_buffer_uptodate(next)) {
2068                                 struct btrfs_key node_key;
2069
2070                                 btrfs_node_key_to_cpu(path->nodes[*level],
2071                                                       &node_key,
2072                                                       path->slots[*level]);
2073                                 btrfs_add_corrupt_extent_record(root->fs_info,
2074                                                 &node_key,
2075                                                 path->nodes[*level]->start,
2076                                                 root->nodesize, *level);
2077                                 err = -EIO;
2078                                 goto out;
2079                         }
2080                 }
2081
2082                 ret = check_child_node(root, cur, path->slots[*level], next);
2083                 if (ret) {
2084                         err = ret;
2085                         goto out;
2086                 }
2087
2088                 if (btrfs_is_leaf(next))
2089                         status = btrfs_check_leaf(root, NULL, next);
2090                 else
2091                         status = btrfs_check_node(root, NULL, next);
2092                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2093                         free_extent_buffer(next);
2094                         err = -EIO;
2095                         goto out;
2096                 }
2097
2098                 *level = *level - 1;
2099                 free_extent_buffer(path->nodes[*level]);
2100                 path->nodes[*level] = next;
2101                 path->slots[*level] = 0;
2102         }
2103 out:
2104         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2105         return err;
2106 }
2107
2108 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2109                         struct walk_control *wc, int *level)
2110 {
2111         int i;
2112         struct extent_buffer *leaf;
2113
2114         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2115                 leaf = path->nodes[i];
2116                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2117                         path->slots[i]++;
2118                         *level = i;
2119                         return 0;
2120                 } else {
2121                         free_extent_buffer(path->nodes[*level]);
2122                         path->nodes[*level] = NULL;
2123                         BUG_ON(*level > wc->active_node);
2124                         if (*level == wc->active_node)
2125                                 leave_shared_node(root, wc, *level);
2126                         *level = i + 1;
2127                 }
2128         }
2129         return 1;
2130 }
2131
2132 static int check_root_dir(struct inode_record *rec)
2133 {
2134         struct inode_backref *backref;
2135         int ret = -1;
2136
2137         if (!rec->found_inode_item || rec->errors)
2138                 goto out;
2139         if (rec->nlink != 1 || rec->found_link != 0)
2140                 goto out;
2141         if (list_empty(&rec->backrefs))
2142                 goto out;
2143         backref = to_inode_backref(rec->backrefs.next);
2144         if (!backref->found_inode_ref)
2145                 goto out;
2146         if (backref->index != 0 || backref->namelen != 2 ||
2147             memcmp(backref->name, "..", 2))
2148                 goto out;
2149         if (backref->found_dir_index || backref->found_dir_item)
2150                 goto out;
2151         ret = 0;
2152 out:
2153         return ret;
2154 }
2155
2156 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2157                               struct btrfs_root *root, struct btrfs_path *path,
2158                               struct inode_record *rec)
2159 {
2160         struct btrfs_inode_item *ei;
2161         struct btrfs_key key;
2162         int ret;
2163
2164         key.objectid = rec->ino;
2165         key.type = BTRFS_INODE_ITEM_KEY;
2166         key.offset = (u64)-1;
2167
2168         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2169         if (ret < 0)
2170                 goto out;
2171         if (ret) {
2172                 if (!path->slots[0]) {
2173                         ret = -ENOENT;
2174                         goto out;
2175                 }
2176                 path->slots[0]--;
2177                 ret = 0;
2178         }
2179         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2180         if (key.objectid != rec->ino) {
2181                 ret = -ENOENT;
2182                 goto out;
2183         }
2184
2185         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2186                             struct btrfs_inode_item);
2187         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2188         btrfs_mark_buffer_dirty(path->nodes[0]);
2189         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2190         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2191                root->root_key.objectid);
2192 out:
2193         btrfs_release_path(path);
2194         return ret;
2195 }
2196
2197 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2198                                     struct btrfs_root *root,
2199                                     struct btrfs_path *path,
2200                                     struct inode_record *rec)
2201 {
2202         int ret;
2203
2204         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2205         btrfs_release_path(path);
2206         if (!ret)
2207                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2208         return ret;
2209 }
2210
2211 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2212                                struct btrfs_root *root,
2213                                struct btrfs_path *path,
2214                                struct inode_record *rec)
2215 {
2216         struct btrfs_inode_item *ei;
2217         struct btrfs_key key;
2218         int ret = 0;
2219
2220         key.objectid = rec->ino;
2221         key.type = BTRFS_INODE_ITEM_KEY;
2222         key.offset = 0;
2223
2224         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2225         if (ret) {
2226                 if (ret > 0)
2227                         ret = -ENOENT;
2228                 goto out;
2229         }
2230
2231         /* Since ret == 0, no need to check anything */
2232         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2233                             struct btrfs_inode_item);
2234         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2235         btrfs_mark_buffer_dirty(path->nodes[0]);
2236         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2237         printf("reset nbytes for ino %llu root %llu\n",
2238                rec->ino, root->root_key.objectid);
2239 out:
2240         btrfs_release_path(path);
2241         return ret;
2242 }
2243
2244 static int add_missing_dir_index(struct btrfs_root *root,
2245                                  struct cache_tree *inode_cache,
2246                                  struct inode_record *rec,
2247                                  struct inode_backref *backref)
2248 {
2249         struct btrfs_path *path;
2250         struct btrfs_trans_handle *trans;
2251         struct btrfs_dir_item *dir_item;
2252         struct extent_buffer *leaf;
2253         struct btrfs_key key;
2254         struct btrfs_disk_key disk_key;
2255         struct inode_record *dir_rec;
2256         unsigned long name_ptr;
2257         u32 data_size = sizeof(*dir_item) + backref->namelen;
2258         int ret;
2259
2260         path = btrfs_alloc_path();
2261         if (!path)
2262                 return -ENOMEM;
2263
2264         trans = btrfs_start_transaction(root, 1);
2265         if (IS_ERR(trans)) {
2266                 btrfs_free_path(path);
2267                 return PTR_ERR(trans);
2268         }
2269
2270         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2271                 (unsigned long long)rec->ino);
2272         key.objectid = backref->dir;
2273         key.type = BTRFS_DIR_INDEX_KEY;
2274         key.offset = backref->index;
2275
2276         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2277         BUG_ON(ret);
2278
2279         leaf = path->nodes[0];
2280         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2281
2282         disk_key.objectid = cpu_to_le64(rec->ino);
2283         disk_key.type = BTRFS_INODE_ITEM_KEY;
2284         disk_key.offset = 0;
2285
2286         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2287         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2288         btrfs_set_dir_data_len(leaf, dir_item, 0);
2289         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2290         name_ptr = (unsigned long)(dir_item + 1);
2291         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2292         btrfs_mark_buffer_dirty(leaf);
2293         btrfs_free_path(path);
2294         btrfs_commit_transaction(trans, root);
2295
2296         backref->found_dir_index = 1;
2297         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2298         BUG_ON(IS_ERR(dir_rec));
2299         if (!dir_rec)
2300                 return 0;
2301         dir_rec->found_size += backref->namelen;
2302         if (dir_rec->found_size == dir_rec->isize &&
2303             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2304                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2305         if (dir_rec->found_size != dir_rec->isize)
2306                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2307
2308         return 0;
2309 }
2310
2311 static int delete_dir_index(struct btrfs_root *root,
2312                             struct cache_tree *inode_cache,
2313                             struct inode_record *rec,
2314                             struct inode_backref *backref)
2315 {
2316         struct btrfs_trans_handle *trans;
2317         struct btrfs_dir_item *di;
2318         struct btrfs_path *path;
2319         int ret = 0;
2320
2321         path = btrfs_alloc_path();
2322         if (!path)
2323                 return -ENOMEM;
2324
2325         trans = btrfs_start_transaction(root, 1);
2326         if (IS_ERR(trans)) {
2327                 btrfs_free_path(path);
2328                 return PTR_ERR(trans);
2329         }
2330
2331
2332         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2333                 (unsigned long long)backref->dir,
2334                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2335                 (unsigned long long)root->objectid);
2336
2337         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2338                                     backref->name, backref->namelen,
2339                                     backref->index, -1);
2340         if (IS_ERR(di)) {
2341                 ret = PTR_ERR(di);
2342                 btrfs_free_path(path);
2343                 btrfs_commit_transaction(trans, root);
2344                 if (ret == -ENOENT)
2345                         return 0;
2346                 return ret;
2347         }
2348
2349         if (!di)
2350                 ret = btrfs_del_item(trans, root, path);
2351         else
2352                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2353         BUG_ON(ret);
2354         btrfs_free_path(path);
2355         btrfs_commit_transaction(trans, root);
2356         return ret;
2357 }
2358
2359 static int create_inode_item(struct btrfs_root *root,
2360                              struct inode_record *rec,
2361                              struct inode_backref *backref, int root_dir)
2362 {
2363         struct btrfs_trans_handle *trans;
2364         struct btrfs_inode_item inode_item;
2365         time_t now = time(NULL);
2366         int ret;
2367
2368         trans = btrfs_start_transaction(root, 1);
2369         if (IS_ERR(trans)) {
2370                 ret = PTR_ERR(trans);
2371                 return ret;
2372         }
2373
2374         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2375                 "be incomplete, please check permissions and content after "
2376                 "the fsck completes.\n", (unsigned long long)root->objectid,
2377                 (unsigned long long)rec->ino);
2378
2379         memset(&inode_item, 0, sizeof(inode_item));
2380         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2381         if (root_dir)
2382                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2383         else
2384                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2385         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2386         if (rec->found_dir_item) {
2387                 if (rec->found_file_extent)
2388                         fprintf(stderr, "root %llu inode %llu has both a dir "
2389                                 "item and extents, unsure if it is a dir or a "
2390                                 "regular file so setting it as a directory\n",
2391                                 (unsigned long long)root->objectid,
2392                                 (unsigned long long)rec->ino);
2393                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2394                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2395         } else if (!rec->found_dir_item) {
2396                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2397                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2398         }
2399         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2400         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2401         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2402         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2403         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2404         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2405         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2406         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2407
2408         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2409         BUG_ON(ret);
2410         btrfs_commit_transaction(trans, root);
2411         return 0;
2412 }
2413
2414 static int repair_inode_backrefs(struct btrfs_root *root,
2415                                  struct inode_record *rec,
2416                                  struct cache_tree *inode_cache,
2417                                  int delete)
2418 {
2419         struct inode_backref *tmp, *backref;
2420         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2421         int ret = 0;
2422         int repaired = 0;
2423
2424         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2425                 if (!delete && rec->ino == root_dirid) {
2426                         if (!rec->found_inode_item) {
2427                                 ret = create_inode_item(root, rec, backref, 1);
2428                                 if (ret)
2429                                         break;
2430                                 repaired++;
2431                         }
2432                 }
2433
2434                 /* Index 0 for root dir's are special, don't mess with it */
2435                 if (rec->ino == root_dirid && backref->index == 0)
2436                         continue;
2437
2438                 if (delete &&
2439                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2440                      (backref->found_dir_index && backref->found_inode_ref &&
2441                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2442                         ret = delete_dir_index(root, inode_cache, rec, backref);
2443                         if (ret)
2444                                 break;
2445                         repaired++;
2446                         list_del(&backref->list);
2447                         free(backref);
2448                 }
2449
2450                 if (!delete && !backref->found_dir_index &&
2451                     backref->found_dir_item && backref->found_inode_ref) {
2452                         ret = add_missing_dir_index(root, inode_cache, rec,
2453                                                     backref);
2454                         if (ret)
2455                                 break;
2456                         repaired++;
2457                         if (backref->found_dir_item &&
2458                             backref->found_dir_index &&
2459                             backref->found_dir_index) {
2460                                 if (!backref->errors &&
2461                                     backref->found_inode_ref) {
2462                                         list_del(&backref->list);
2463                                         free(backref);
2464                                 }
2465                         }
2466                 }
2467
2468                 if (!delete && (!backref->found_dir_index &&
2469                                 !backref->found_dir_item &&
2470                                 backref->found_inode_ref)) {
2471                         struct btrfs_trans_handle *trans;
2472                         struct btrfs_key location;
2473
2474                         ret = check_dir_conflict(root, backref->name,
2475                                                  backref->namelen,
2476                                                  backref->dir,
2477                                                  backref->index);
2478                         if (ret) {
2479                                 /*
2480                                  * let nlink fixing routine to handle it,
2481                                  * which can do it better.
2482                                  */
2483                                 ret = 0;
2484                                 break;
2485                         }
2486                         location.objectid = rec->ino;
2487                         location.type = BTRFS_INODE_ITEM_KEY;
2488                         location.offset = 0;
2489
2490                         trans = btrfs_start_transaction(root, 1);
2491                         if (IS_ERR(trans)) {
2492                                 ret = PTR_ERR(trans);
2493                                 break;
2494                         }
2495                         fprintf(stderr, "adding missing dir index/item pair "
2496                                 "for inode %llu\n",
2497                                 (unsigned long long)rec->ino);
2498                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2499                                                     backref->namelen,
2500                                                     backref->dir, &location,
2501                                                     imode_to_type(rec->imode),
2502                                                     backref->index);
2503                         BUG_ON(ret);
2504                         btrfs_commit_transaction(trans, root);
2505                         repaired++;
2506                 }
2507
2508                 if (!delete && (backref->found_inode_ref &&
2509                                 backref->found_dir_index &&
2510                                 backref->found_dir_item &&
2511                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2512                                 !rec->found_inode_item)) {
2513                         ret = create_inode_item(root, rec, backref, 0);
2514                         if (ret)
2515                                 break;
2516                         repaired++;
2517                 }
2518
2519         }
2520         return ret ? ret : repaired;
2521 }
2522
2523 /*
2524  * To determine the file type for nlink/inode_item repair
2525  *
2526  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2527  * Return -ENOENT if file type is not found.
2528  */
2529 static int find_file_type(struct inode_record *rec, u8 *type)
2530 {
2531         struct inode_backref *backref;
2532
2533         /* For inode item recovered case */
2534         if (rec->found_inode_item) {
2535                 *type = imode_to_type(rec->imode);
2536                 return 0;
2537         }
2538
2539         list_for_each_entry(backref, &rec->backrefs, list) {
2540                 if (backref->found_dir_index || backref->found_dir_item) {
2541                         *type = backref->filetype;
2542                         return 0;
2543                 }
2544         }
2545         return -ENOENT;
2546 }
2547
2548 /*
2549  * To determine the file name for nlink repair
2550  *
2551  * Return 0 if file name is found, set name and namelen.
2552  * Return -ENOENT if file name is not found.
2553  */
2554 static int find_file_name(struct inode_record *rec,
2555                           char *name, int *namelen)
2556 {
2557         struct inode_backref *backref;
2558
2559         list_for_each_entry(backref, &rec->backrefs, list) {
2560                 if (backref->found_dir_index || backref->found_dir_item ||
2561                     backref->found_inode_ref) {
2562                         memcpy(name, backref->name, backref->namelen);
2563                         *namelen = backref->namelen;
2564                         return 0;
2565                 }
2566         }
2567         return -ENOENT;
2568 }
2569
2570 /* Reset the nlink of the inode to the correct one */
2571 static int reset_nlink(struct btrfs_trans_handle *trans,
2572                        struct btrfs_root *root,
2573                        struct btrfs_path *path,
2574                        struct inode_record *rec)
2575 {
2576         struct inode_backref *backref;
2577         struct inode_backref *tmp;
2578         struct btrfs_key key;
2579         struct btrfs_inode_item *inode_item;
2580         int ret = 0;
2581
2582         /* We don't believe this either, reset it and iterate backref */
2583         rec->found_link = 0;
2584
2585         /* Remove all backref including the valid ones */
2586         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2587                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2588                                    backref->index, backref->name,
2589                                    backref->namelen, 0);
2590                 if (ret < 0)
2591                         goto out;
2592
2593                 /* remove invalid backref, so it won't be added back */
2594                 if (!(backref->found_dir_index &&
2595                       backref->found_dir_item &&
2596                       backref->found_inode_ref)) {
2597                         list_del(&backref->list);
2598                         free(backref);
2599                 } else {
2600                         rec->found_link++;
2601                 }
2602         }
2603
2604         /* Set nlink to 0 */
2605         key.objectid = rec->ino;
2606         key.type = BTRFS_INODE_ITEM_KEY;
2607         key.offset = 0;
2608         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2609         if (ret < 0)
2610                 goto out;
2611         if (ret > 0) {
2612                 ret = -ENOENT;
2613                 goto out;
2614         }
2615         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2616                                     struct btrfs_inode_item);
2617         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2618         btrfs_mark_buffer_dirty(path->nodes[0]);
2619         btrfs_release_path(path);
2620
2621         /*
2622          * Add back valid inode_ref/dir_item/dir_index,
2623          * add_link() will handle the nlink inc, so new nlink must be correct
2624          */
2625         list_for_each_entry(backref, &rec->backrefs, list) {
2626                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2627                                      backref->name, backref->namelen,
2628                                      backref->filetype, &backref->index, 1);
2629                 if (ret < 0)
2630                         goto out;
2631         }
2632 out:
2633         btrfs_release_path(path);
2634         return ret;
2635 }
2636
2637 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2638                                struct btrfs_root *root,
2639                                struct btrfs_path *path,
2640                                struct inode_record *rec)
2641 {
2642         char *dir_name = "lost+found";
2643         char namebuf[BTRFS_NAME_LEN] = {0};
2644         u64 lost_found_ino;
2645         u32 mode = 0700;
2646         u8 type = 0;
2647         int namelen = 0;
2648         int name_recovered = 0;
2649         int type_recovered = 0;
2650         int ret = 0;
2651
2652         /*
2653          * Get file name and type first before these invalid inode ref
2654          * are deleted by remove_all_invalid_backref()
2655          */
2656         name_recovered = !find_file_name(rec, namebuf, &namelen);
2657         type_recovered = !find_file_type(rec, &type);
2658
2659         if (!name_recovered) {
2660                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2661                        rec->ino, rec->ino);
2662                 namelen = count_digits(rec->ino);
2663                 sprintf(namebuf, "%llu", rec->ino);
2664                 name_recovered = 1;
2665         }
2666         if (!type_recovered) {
2667                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2668                        rec->ino);
2669                 type = BTRFS_FT_REG_FILE;
2670                 type_recovered = 1;
2671         }
2672
2673         ret = reset_nlink(trans, root, path, rec);
2674         if (ret < 0) {
2675                 fprintf(stderr,
2676                         "Failed to reset nlink for inode %llu: %s\n",
2677                         rec->ino, strerror(-ret));
2678                 goto out;
2679         }
2680
2681         if (rec->found_link == 0) {
2682                 lost_found_ino = root->highest_inode;
2683                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2684                         ret = -EOVERFLOW;
2685                         goto out;
2686                 }
2687                 lost_found_ino++;
2688                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2689                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2690                                   mode);
2691                 if (ret < 0) {
2692                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2693                                 dir_name, strerror(-ret));
2694                         goto out;
2695                 }
2696                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2697                                      namebuf, namelen, type, NULL, 1);
2698                 /*
2699                  * Add ".INO" suffix several times to handle case where
2700                  * "FILENAME.INO" is already taken by another file.
2701                  */
2702                 while (ret == -EEXIST) {
2703                         /*
2704                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2705                          */
2706                         if (namelen + count_digits(rec->ino) + 1 >
2707                             BTRFS_NAME_LEN) {
2708                                 ret = -EFBIG;
2709                                 goto out;
2710                         }
2711                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2712                                  ".%llu", rec->ino);
2713                         namelen += count_digits(rec->ino) + 1;
2714                         ret = btrfs_add_link(trans, root, rec->ino,
2715                                              lost_found_ino, namebuf,
2716                                              namelen, type, NULL, 1);
2717                 }
2718                 if (ret < 0) {
2719                         fprintf(stderr,
2720                                 "Failed to link the inode %llu to %s dir: %s\n",
2721                                 rec->ino, dir_name, strerror(-ret));
2722                         goto out;
2723                 }
2724                 /*
2725                  * Just increase the found_link, don't actually add the
2726                  * backref. This will make things easier and this inode
2727                  * record will be freed after the repair is done.
2728                  * So fsck will not report problem about this inode.
2729                  */
2730                 rec->found_link++;
2731                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2732                        namelen, namebuf, dir_name);
2733         }
2734         printf("Fixed the nlink of inode %llu\n", rec->ino);
2735 out:
2736         /*
2737          * Clear the flag anyway, or we will loop forever for the same inode
2738          * as it will not be removed from the bad inode list and the dead loop
2739          * happens.
2740          */
2741         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2742         btrfs_release_path(path);
2743         return ret;
2744 }
2745
2746 /*
2747  * Check if there is any normal(reg or prealloc) file extent for given
2748  * ino.
2749  * This is used to determine the file type when neither its dir_index/item or
2750  * inode_item exists.
2751  *
2752  * This will *NOT* report error, if any error happens, just consider it does
2753  * not have any normal file extent.
2754  */
2755 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2756 {
2757         struct btrfs_path *path;
2758         struct btrfs_key key;
2759         struct btrfs_key found_key;
2760         struct btrfs_file_extent_item *fi;
2761         u8 type;
2762         int ret = 0;
2763
2764         path = btrfs_alloc_path();
2765         if (!path)
2766                 goto out;
2767         key.objectid = ino;
2768         key.type = BTRFS_EXTENT_DATA_KEY;
2769         key.offset = 0;
2770
2771         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2772         if (ret < 0) {
2773                 ret = 0;
2774                 goto out;
2775         }
2776         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2777                 ret = btrfs_next_leaf(root, path);
2778                 if (ret) {
2779                         ret = 0;
2780                         goto out;
2781                 }
2782         }
2783         while (1) {
2784                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2785                                       path->slots[0]);
2786                 if (found_key.objectid != ino ||
2787                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2788                         break;
2789                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2790                                     struct btrfs_file_extent_item);
2791                 type = btrfs_file_extent_type(path->nodes[0], fi);
2792                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2793                         ret = 1;
2794                         goto out;
2795                 }
2796         }
2797 out:
2798         btrfs_free_path(path);
2799         return ret;
2800 }
2801
2802 static u32 btrfs_type_to_imode(u8 type)
2803 {
2804         static u32 imode_by_btrfs_type[] = {
2805                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2806                 [BTRFS_FT_DIR]          = S_IFDIR,
2807                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2808                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2809                 [BTRFS_FT_FIFO]         = S_IFIFO,
2810                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2811                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2812         };
2813
2814         return imode_by_btrfs_type[(type)];
2815 }
2816
2817 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2818                                 struct btrfs_root *root,
2819                                 struct btrfs_path *path,
2820                                 struct inode_record *rec)
2821 {
2822         u8 filetype;
2823         u32 mode = 0700;
2824         int type_recovered = 0;
2825         int ret = 0;
2826
2827         printf("Trying to rebuild inode:%llu\n", rec->ino);
2828
2829         type_recovered = !find_file_type(rec, &filetype);
2830
2831         /*
2832          * Try to determine inode type if type not found.
2833          *
2834          * For found regular file extent, it must be FILE.
2835          * For found dir_item/index, it must be DIR.
2836          *
2837          * For undetermined one, use FILE as fallback.
2838          *
2839          * TODO:
2840          * 1. If found backref(inode_index/item is already handled) to it,
2841          *    it must be DIR.
2842          *    Need new inode-inode ref structure to allow search for that.
2843          */
2844         if (!type_recovered) {
2845                 if (rec->found_file_extent &&
2846                     find_normal_file_extent(root, rec->ino)) {
2847                         type_recovered = 1;
2848                         filetype = BTRFS_FT_REG_FILE;
2849                 } else if (rec->found_dir_item) {
2850                         type_recovered = 1;
2851                         filetype = BTRFS_FT_DIR;
2852                 } else if (!list_empty(&rec->orphan_extents)) {
2853                         type_recovered = 1;
2854                         filetype = BTRFS_FT_REG_FILE;
2855                 } else{
2856                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2857                                rec->ino);
2858                         type_recovered = 1;
2859                         filetype = BTRFS_FT_REG_FILE;
2860                 }
2861         }
2862
2863         ret = btrfs_new_inode(trans, root, rec->ino,
2864                               mode | btrfs_type_to_imode(filetype));
2865         if (ret < 0)
2866                 goto out;
2867
2868         /*
2869          * Here inode rebuild is done, we only rebuild the inode item,
2870          * don't repair the nlink(like move to lost+found).
2871          * That is the job of nlink repair.
2872          *
2873          * We just fill the record and return
2874          */
2875         rec->found_dir_item = 1;
2876         rec->imode = mode | btrfs_type_to_imode(filetype);
2877         rec->nlink = 0;
2878         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2879         /* Ensure the inode_nlinks repair function will be called */
2880         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2881 out:
2882         return ret;
2883 }
2884
2885 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2886                                       struct btrfs_root *root,
2887                                       struct btrfs_path *path,
2888                                       struct inode_record *rec)
2889 {
2890         struct orphan_data_extent *orphan;
2891         struct orphan_data_extent *tmp;
2892         int ret = 0;
2893
2894         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2895                 /*
2896                  * Check for conflicting file extents
2897                  *
2898                  * Here we don't know whether the extents is compressed or not,
2899                  * so we can only assume it not compressed nor data offset,
2900                  * and use its disk_len as extent length.
2901                  */
2902                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2903                                        orphan->offset, orphan->disk_len, 0);
2904                 btrfs_release_path(path);
2905                 if (ret < 0)
2906                         goto out;
2907                 if (!ret) {
2908                         fprintf(stderr,
2909                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2910                                 orphan->disk_bytenr, orphan->disk_len);
2911                         ret = btrfs_free_extent(trans,
2912                                         root->fs_info->extent_root,
2913                                         orphan->disk_bytenr, orphan->disk_len,
2914                                         0, root->objectid, orphan->objectid,
2915                                         orphan->offset);
2916                         if (ret < 0)
2917                                 goto out;
2918                 }
2919                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2920                                 orphan->offset, orphan->disk_bytenr,
2921                                 orphan->disk_len, orphan->disk_len);
2922                 if (ret < 0)
2923                         goto out;
2924
2925                 /* Update file size info */
2926                 rec->found_size += orphan->disk_len;
2927                 if (rec->found_size == rec->nbytes)
2928                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2929
2930                 /* Update the file extent hole info too */
2931                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2932                                            orphan->disk_len);
2933                 if (ret < 0)
2934                         goto out;
2935                 if (RB_EMPTY_ROOT(&rec->holes))
2936                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2937
2938                 list_del(&orphan->list);
2939                 free(orphan);
2940         }
2941         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2942 out:
2943         return ret;
2944 }
2945
2946 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2947                                         struct btrfs_root *root,
2948                                         struct btrfs_path *path,
2949                                         struct inode_record *rec)
2950 {
2951         struct rb_node *node;
2952         struct file_extent_hole *hole;
2953         int found = 0;
2954         int ret = 0;
2955
2956         node = rb_first(&rec->holes);
2957
2958         while (node) {
2959                 found = 1;
2960                 hole = rb_entry(node, struct file_extent_hole, node);
2961                 ret = btrfs_punch_hole(trans, root, rec->ino,
2962                                        hole->start, hole->len);
2963                 if (ret < 0)
2964                         goto out;
2965                 ret = del_file_extent_hole(&rec->holes, hole->start,
2966                                            hole->len);
2967                 if (ret < 0)
2968                         goto out;
2969                 if (RB_EMPTY_ROOT(&rec->holes))
2970                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2971                 node = rb_first(&rec->holes);
2972         }
2973         /* special case for a file losing all its file extent */
2974         if (!found) {
2975                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2976                                        round_up(rec->isize, root->sectorsize));
2977                 if (ret < 0)
2978                         goto out;
2979         }
2980         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2981                rec->ino, root->objectid);
2982 out:
2983         return ret;
2984 }
2985
2986 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2987 {
2988         struct btrfs_trans_handle *trans;
2989         struct btrfs_path *path;
2990         int ret = 0;
2991
2992         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2993                              I_ERR_NO_ORPHAN_ITEM |
2994                              I_ERR_LINK_COUNT_WRONG |
2995                              I_ERR_NO_INODE_ITEM |
2996                              I_ERR_FILE_EXTENT_ORPHAN |
2997                              I_ERR_FILE_EXTENT_DISCOUNT|
2998                              I_ERR_FILE_NBYTES_WRONG)))
2999                 return rec->errors;
3000
3001         path = btrfs_alloc_path();
3002         if (!path)
3003                 return -ENOMEM;
3004
3005         /*
3006          * For nlink repair, it may create a dir and add link, so
3007          * 2 for parent(256)'s dir_index and dir_item
3008          * 2 for lost+found dir's inode_item and inode_ref
3009          * 1 for the new inode_ref of the file
3010          * 2 for lost+found dir's dir_index and dir_item for the file
3011          */
3012         trans = btrfs_start_transaction(root, 7);
3013         if (IS_ERR(trans)) {
3014                 btrfs_free_path(path);
3015                 return PTR_ERR(trans);
3016         }
3017
3018         if (rec->errors & I_ERR_NO_INODE_ITEM)
3019                 ret = repair_inode_no_item(trans, root, path, rec);
3020         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3021                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3022         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3023                 ret = repair_inode_discount_extent(trans, root, path, rec);
3024         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3025                 ret = repair_inode_isize(trans, root, path, rec);
3026         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3027                 ret = repair_inode_orphan_item(trans, root, path, rec);
3028         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3029                 ret = repair_inode_nlinks(trans, root, path, rec);
3030         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3031                 ret = repair_inode_nbytes(trans, root, path, rec);
3032         btrfs_commit_transaction(trans, root);
3033         btrfs_free_path(path);
3034         return ret;
3035 }
3036
3037 static int check_inode_recs(struct btrfs_root *root,
3038                             struct cache_tree *inode_cache)
3039 {
3040         struct cache_extent *cache;
3041         struct ptr_node *node;
3042         struct inode_record *rec;
3043         struct inode_backref *backref;
3044         int stage = 0;
3045         int ret = 0;
3046         int err = 0;
3047         u64 error = 0;
3048         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3049
3050         if (btrfs_root_refs(&root->root_item) == 0) {
3051                 if (!cache_tree_empty(inode_cache))
3052                         fprintf(stderr, "warning line %d\n", __LINE__);
3053                 return 0;
3054         }
3055
3056         /*
3057          * We need to record the highest inode number for later 'lost+found'
3058          * dir creation.
3059          * We must select an ino not used/referred by any existing inode, or
3060          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3061          * this may cause 'lost+found' dir has wrong nlinks.
3062          */
3063         cache = last_cache_extent(inode_cache);
3064         if (cache) {
3065                 node = container_of(cache, struct ptr_node, cache);
3066                 rec = node->data;
3067                 if (rec->ino > root->highest_inode)
3068                         root->highest_inode = rec->ino;
3069         }
3070
3071         /*
3072          * We need to repair backrefs first because we could change some of the
3073          * errors in the inode recs.
3074          *
3075          * We also need to go through and delete invalid backrefs first and then
3076          * add the correct ones second.  We do this because we may get EEXIST
3077          * when adding back the correct index because we hadn't yet deleted the
3078          * invalid index.
3079          *
3080          * For example, if we were missing a dir index then the directories
3081          * isize would be wrong, so if we fixed the isize to what we thought it
3082          * would be and then fixed the backref we'd still have a invalid fs, so
3083          * we need to add back the dir index and then check to see if the isize
3084          * is still wrong.
3085          */
3086         while (stage < 3) {
3087                 stage++;
3088                 if (stage == 3 && !err)
3089                         break;
3090
3091                 cache = search_cache_extent(inode_cache, 0);
3092                 while (repair && cache) {
3093                         node = container_of(cache, struct ptr_node, cache);
3094                         rec = node->data;
3095                         cache = next_cache_extent(cache);
3096
3097                         /* Need to free everything up and rescan */
3098                         if (stage == 3) {
3099                                 remove_cache_extent(inode_cache, &node->cache);
3100                                 free(node);
3101                                 free_inode_rec(rec);
3102                                 continue;
3103                         }
3104
3105                         if (list_empty(&rec->backrefs))
3106                                 continue;
3107
3108                         ret = repair_inode_backrefs(root, rec, inode_cache,
3109                                                     stage == 1);
3110                         if (ret < 0) {
3111                                 err = ret;
3112                                 stage = 2;
3113                                 break;
3114                         } if (ret > 0) {
3115                                 err = -EAGAIN;
3116                         }
3117                 }
3118         }
3119         if (err)
3120                 return err;
3121
3122         rec = get_inode_rec(inode_cache, root_dirid, 0);
3123         BUG_ON(IS_ERR(rec));
3124         if (rec) {
3125                 ret = check_root_dir(rec);
3126                 if (ret) {
3127                         fprintf(stderr, "root %llu root dir %llu error\n",
3128                                 (unsigned long long)root->root_key.objectid,
3129                                 (unsigned long long)root_dirid);
3130                         print_inode_error(root, rec);
3131                         error++;
3132                 }
3133         } else {
3134                 if (repair) {
3135                         struct btrfs_trans_handle *trans;
3136
3137                         trans = btrfs_start_transaction(root, 1);
3138                         if (IS_ERR(trans)) {
3139                                 err = PTR_ERR(trans);
3140                                 return err;
3141                         }
3142
3143                         fprintf(stderr,
3144                                 "root %llu missing its root dir, recreating\n",
3145                                 (unsigned long long)root->objectid);
3146
3147                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3148                         BUG_ON(ret);
3149
3150                         btrfs_commit_transaction(trans, root);
3151                         return -EAGAIN;
3152                 }
3153
3154                 fprintf(stderr, "root %llu root dir %llu not found\n",
3155                         (unsigned long long)root->root_key.objectid,
3156                         (unsigned long long)root_dirid);
3157         }
3158
3159         while (1) {
3160                 cache = search_cache_extent(inode_cache, 0);
3161                 if (!cache)
3162                         break;
3163                 node = container_of(cache, struct ptr_node, cache);
3164                 rec = node->data;
3165                 remove_cache_extent(inode_cache, &node->cache);
3166                 free(node);
3167                 if (rec->ino == root_dirid ||
3168                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3169                         free_inode_rec(rec);
3170                         continue;
3171                 }
3172
3173                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3174                         ret = check_orphan_item(root, rec->ino);
3175                         if (ret == 0)
3176                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3177                         if (can_free_inode_rec(rec)) {
3178                                 free_inode_rec(rec);
3179                                 continue;
3180                         }
3181                 }
3182
3183                 if (!rec->found_inode_item)
3184                         rec->errors |= I_ERR_NO_INODE_ITEM;
3185                 if (rec->found_link != rec->nlink)
3186                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3187                 if (repair) {
3188                         ret = try_repair_inode(root, rec);
3189                         if (ret == 0 && can_free_inode_rec(rec)) {
3190                                 free_inode_rec(rec);
3191                                 continue;
3192                         }
3193                         ret = 0;
3194                 }
3195
3196                 if (!(repair && ret == 0))
3197                         error++;
3198                 print_inode_error(root, rec);
3199                 list_for_each_entry(backref, &rec->backrefs, list) {
3200                         if (!backref->found_dir_item)
3201                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3202                         if (!backref->found_dir_index)
3203                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3204                         if (!backref->found_inode_ref)
3205                                 backref->errors |= REF_ERR_NO_INODE_REF;
3206                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3207                                 " namelen %u name %s filetype %d errors %x",
3208                                 (unsigned long long)backref->dir,
3209                                 (unsigned long long)backref->index,
3210                                 backref->namelen, backref->name,
3211                                 backref->filetype, backref->errors);
3212                         print_ref_error(backref->errors);
3213                 }
3214                 free_inode_rec(rec);
3215         }
3216         return (error > 0) ? -1 : 0;
3217 }
3218
3219 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3220                                         u64 objectid)
3221 {
3222         struct cache_extent *cache;
3223         struct root_record *rec = NULL;
3224         int ret;
3225
3226         cache = lookup_cache_extent(root_cache, objectid, 1);
3227         if (cache) {
3228                 rec = container_of(cache, struct root_record, cache);
3229         } else {
3230                 rec = calloc(1, sizeof(*rec));
3231                 if (!rec)
3232                         return ERR_PTR(-ENOMEM);
3233                 rec->objectid = objectid;
3234                 INIT_LIST_HEAD(&rec->backrefs);
3235                 rec->cache.start = objectid;
3236                 rec->cache.size = 1;
3237
3238                 ret = insert_cache_extent(root_cache, &rec->cache);
3239                 if (ret)
3240                         return ERR_PTR(-EEXIST);
3241         }
3242         return rec;
3243 }
3244
3245 static struct root_backref *get_root_backref(struct root_record *rec,
3246                                              u64 ref_root, u64 dir, u64 index,
3247                                              const char *name, int namelen)
3248 {
3249         struct root_backref *backref;
3250
3251         list_for_each_entry(backref, &rec->backrefs, list) {
3252                 if (backref->ref_root != ref_root || backref->dir != dir ||
3253                     backref->namelen != namelen)
3254                         continue;
3255                 if (memcmp(name, backref->name, namelen))
3256                         continue;
3257                 return backref;
3258         }
3259
3260         backref = calloc(1, sizeof(*backref) + namelen + 1);
3261         if (!backref)
3262                 return NULL;
3263         backref->ref_root = ref_root;
3264         backref->dir = dir;
3265         backref->index = index;
3266         backref->namelen = namelen;
3267         memcpy(backref->name, name, namelen);
3268         backref->name[namelen] = '\0';
3269         list_add_tail(&backref->list, &rec->backrefs);
3270         return backref;
3271 }
3272
3273 static void free_root_record(struct cache_extent *cache)
3274 {
3275         struct root_record *rec;
3276         struct root_backref *backref;
3277
3278         rec = container_of(cache, struct root_record, cache);
3279         while (!list_empty(&rec->backrefs)) {
3280                 backref = to_root_backref(rec->backrefs.next);
3281                 list_del(&backref->list);
3282                 free(backref);
3283         }
3284
3285         kfree(rec);
3286 }
3287
3288 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3289
3290 static int add_root_backref(struct cache_tree *root_cache,
3291                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3292                             const char *name, int namelen,
3293                             int item_type, int errors)
3294 {
3295         struct root_record *rec;
3296         struct root_backref *backref;
3297
3298         rec = get_root_rec(root_cache, root_id);
3299         BUG_ON(IS_ERR(rec));
3300         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3301         BUG_ON(!backref);
3302
3303         backref->errors |= errors;
3304
3305         if (item_type != BTRFS_DIR_ITEM_KEY) {
3306                 if (backref->found_dir_index || backref->found_back_ref ||
3307                     backref->found_forward_ref) {
3308                         if (backref->index != index)
3309                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3310                 } else {
3311                         backref->index = index;
3312                 }
3313         }
3314
3315         if (item_type == BTRFS_DIR_ITEM_KEY) {
3316                 if (backref->found_forward_ref)
3317                         rec->found_ref++;
3318                 backref->found_dir_item = 1;
3319         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3320                 backref->found_dir_index = 1;
3321         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3322                 if (backref->found_forward_ref)
3323                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3324                 else if (backref->found_dir_item)
3325                         rec->found_ref++;
3326                 backref->found_forward_ref = 1;
3327         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3328                 if (backref->found_back_ref)
3329                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3330                 backref->found_back_ref = 1;
3331         } else {
3332                 BUG_ON(1);
3333         }
3334
3335         if (backref->found_forward_ref && backref->found_dir_item)
3336                 backref->reachable = 1;
3337         return 0;
3338 }
3339
3340 static int merge_root_recs(struct btrfs_root *root,
3341                            struct cache_tree *src_cache,
3342                            struct cache_tree *dst_cache)
3343 {
3344         struct cache_extent *cache;
3345         struct ptr_node *node;
3346         struct inode_record *rec;
3347         struct inode_backref *backref;
3348         int ret = 0;
3349
3350         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3351                 free_inode_recs_tree(src_cache);
3352                 return 0;
3353         }
3354
3355         while (1) {
3356                 cache = search_cache_extent(src_cache, 0);
3357                 if (!cache)
3358                         break;
3359                 node = container_of(cache, struct ptr_node, cache);
3360                 rec = node->data;
3361                 remove_cache_extent(src_cache, &node->cache);
3362                 free(node);
3363
3364                 ret = is_child_root(root, root->objectid, rec->ino);
3365                 if (ret < 0)
3366                         break;
3367                 else if (ret == 0)
3368                         goto skip;
3369
3370                 list_for_each_entry(backref, &rec->backrefs, list) {
3371                         BUG_ON(backref->found_inode_ref);
3372                         if (backref->found_dir_item)
3373                                 add_root_backref(dst_cache, rec->ino,
3374                                         root->root_key.objectid, backref->dir,
3375                                         backref->index, backref->name,
3376                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3377                                         backref->errors);
3378                         if (backref->found_dir_index)
3379                                 add_root_backref(dst_cache, rec->ino,
3380                                         root->root_key.objectid, backref->dir,
3381                                         backref->index, backref->name,
3382                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3383                                         backref->errors);
3384                 }
3385 skip:
3386                 free_inode_rec(rec);
3387         }
3388         if (ret < 0)
3389                 return ret;
3390         return 0;
3391 }
3392
3393 static int check_root_refs(struct btrfs_root *root,
3394                            struct cache_tree *root_cache)
3395 {
3396         struct root_record *rec;
3397         struct root_record *ref_root;
3398         struct root_backref *backref;
3399         struct cache_extent *cache;
3400         int loop = 1;
3401         int ret;
3402         int error;
3403         int errors = 0;
3404
3405         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3406         BUG_ON(IS_ERR(rec));
3407         rec->found_ref = 1;
3408
3409         /* fixme: this can not detect circular references */
3410         while (loop) {
3411                 loop = 0;
3412                 cache = search_cache_extent(root_cache, 0);
3413                 while (1) {
3414                         if (!cache)
3415                                 break;
3416                         rec = container_of(cache, struct root_record, cache);
3417                         cache = next_cache_extent(cache);
3418
3419                         if (rec->found_ref == 0)
3420                                 continue;
3421
3422                         list_for_each_entry(backref, &rec->backrefs, list) {
3423                                 if (!backref->reachable)
3424                                         continue;
3425
3426                                 ref_root = get_root_rec(root_cache,
3427                                                         backref->ref_root);
3428                                 BUG_ON(IS_ERR(ref_root));
3429                                 if (ref_root->found_ref > 0)
3430                                         continue;
3431
3432                                 backref->reachable = 0;
3433                                 rec->found_ref--;
3434                                 if (rec->found_ref == 0)
3435                                         loop = 1;
3436                         }
3437                 }
3438         }
3439
3440         cache = search_cache_extent(root_cache, 0);
3441         while (1) {
3442                 if (!cache)
3443                         break;
3444                 rec = container_of(cache, struct root_record, cache);
3445                 cache = next_cache_extent(cache);
3446
3447                 if (rec->found_ref == 0 &&
3448                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3449                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3450                         ret = check_orphan_item(root->fs_info->tree_root,
3451                                                 rec->objectid);
3452                         if (ret == 0)
3453                                 continue;
3454
3455                         /*
3456                          * If we don't have a root item then we likely just have
3457                          * a dir item in a snapshot for this root but no actual
3458                          * ref key or anything so it's meaningless.
3459                          */
3460                         if (!rec->found_root_item)
3461                                 continue;
3462                         errors++;
3463                         fprintf(stderr, "fs tree %llu not referenced\n",
3464                                 (unsigned long long)rec->objectid);
3465                 }
3466
3467                 error = 0;
3468                 if (rec->found_ref > 0 && !rec->found_root_item)
3469                         error = 1;
3470                 list_for_each_entry(backref, &rec->backrefs, list) {
3471                         if (!backref->found_dir_item)
3472                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3473                         if (!backref->found_dir_index)
3474                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3475                         if (!backref->found_back_ref)
3476                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3477                         if (!backref->found_forward_ref)
3478                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3479                         if (backref->reachable && backref->errors)
3480                                 error = 1;
3481                 }
3482                 if (!error)
3483                         continue;
3484
3485                 errors++;
3486                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3487                         (unsigned long long)rec->objectid, rec->found_ref,
3488                          rec->found_root_item ? "" : "not found");
3489
3490                 list_for_each_entry(backref, &rec->backrefs, list) {
3491                         if (!backref->reachable)
3492                                 continue;
3493                         if (!backref->errors && rec->found_root_item)
3494                                 continue;
3495                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3496                                 " index %llu namelen %u name %s errors %x\n",
3497                                 (unsigned long long)backref->ref_root,
3498                                 (unsigned long long)backref->dir,
3499                                 (unsigned long long)backref->index,
3500                                 backref->namelen, backref->name,
3501                                 backref->errors);
3502                         print_ref_error(backref->errors);
3503                 }
3504         }
3505         return errors > 0 ? 1 : 0;
3506 }
3507
3508 static int process_root_ref(struct extent_buffer *eb, int slot,
3509                             struct btrfs_key *key,
3510                             struct cache_tree *root_cache)
3511 {
3512         u64 dirid;
3513         u64 index;
3514         u32 len;
3515         u32 name_len;
3516         struct btrfs_root_ref *ref;
3517         char namebuf[BTRFS_NAME_LEN];
3518         int error;
3519
3520         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3521
3522         dirid = btrfs_root_ref_dirid(eb, ref);
3523         index = btrfs_root_ref_sequence(eb, ref);
3524         name_len = btrfs_root_ref_name_len(eb, ref);
3525
3526         if (name_len <= BTRFS_NAME_LEN) {
3527                 len = name_len;
3528                 error = 0;
3529         } else {
3530                 len = BTRFS_NAME_LEN;
3531                 error = REF_ERR_NAME_TOO_LONG;
3532         }
3533         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3534
3535         if (key->type == BTRFS_ROOT_REF_KEY) {
3536                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3537                                  index, namebuf, len, key->type, error);
3538         } else {
3539                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3540                                  index, namebuf, len, key->type, error);
3541         }
3542         return 0;
3543 }
3544
3545 static void free_corrupt_block(struct cache_extent *cache)
3546 {
3547         struct btrfs_corrupt_block *corrupt;
3548
3549         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3550         free(corrupt);
3551 }
3552
3553 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3554
3555 /*
3556  * Repair the btree of the given root.
3557  *
3558  * The fix is to remove the node key in corrupt_blocks cache_tree.
3559  * and rebalance the tree.
3560  * After the fix, the btree should be writeable.
3561  */
3562 static int repair_btree(struct btrfs_root *root,
3563                         struct cache_tree *corrupt_blocks)
3564 {
3565         struct btrfs_trans_handle *trans;
3566         struct btrfs_path *path;
3567         struct btrfs_corrupt_block *corrupt;
3568         struct cache_extent *cache;
3569         struct btrfs_key key;
3570         u64 offset;
3571         int level;
3572         int ret = 0;
3573
3574         if (cache_tree_empty(corrupt_blocks))
3575                 return 0;
3576
3577         path = btrfs_alloc_path();
3578         if (!path)
3579                 return -ENOMEM;
3580
3581         trans = btrfs_start_transaction(root, 1);
3582         if (IS_ERR(trans)) {
3583                 ret = PTR_ERR(trans);
3584                 fprintf(stderr, "Error starting transaction: %s\n",
3585                         strerror(-ret));
3586                 goto out_free_path;
3587         }
3588         cache = first_cache_extent(corrupt_blocks);
3589         while (cache) {
3590                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3591                                        cache);
3592                 level = corrupt->level;
3593                 path->lowest_level = level;
3594                 key.objectid = corrupt->key.objectid;
3595                 key.type = corrupt->key.type;
3596                 key.offset = corrupt->key.offset;
3597
3598                 /*
3599                  * Here we don't want to do any tree balance, since it may
3600                  * cause a balance with corrupted brother leaf/node,
3601                  * so ins_len set to 0 here.
3602                  * Balance will be done after all corrupt node/leaf is deleted.
3603                  */
3604                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3605                 if (ret < 0)
3606                         goto out;
3607                 offset = btrfs_node_blockptr(path->nodes[level],
3608                                              path->slots[level]);
3609
3610                 /* Remove the ptr */
3611                 ret = btrfs_del_ptr(trans, root, path, level,
3612                                     path->slots[level]);
3613                 if (ret < 0)
3614                         goto out;
3615                 /*
3616                  * Remove the corresponding extent
3617                  * return value is not concerned.
3618                  */
3619                 btrfs_release_path(path);
3620                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3621                                         0, root->root_key.objectid,
3622                                         level - 1, 0);
3623                 cache = next_cache_extent(cache);
3624         }
3625
3626         /* Balance the btree using btrfs_search_slot() */
3627         cache = first_cache_extent(corrupt_blocks);
3628         while (cache) {
3629                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3630                                        cache);
3631                 memcpy(&key, &corrupt->key, sizeof(key));
3632                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3633                 if (ret < 0)
3634                         goto out;
3635                 /* return will always >0 since it won't find the item */
3636                 ret = 0;
3637                 btrfs_release_path(path);
3638                 cache = next_cache_extent(cache);
3639         }
3640 out:
3641         btrfs_commit_transaction(trans, root);
3642 out_free_path:
3643         btrfs_free_path(path);
3644         return ret;
3645 }
3646
3647 static int check_fs_root(struct btrfs_root *root,
3648                          struct cache_tree *root_cache,
3649                          struct walk_control *wc)
3650 {
3651         int ret = 0;
3652         int err = 0;
3653         int wret;
3654         int level;
3655         struct btrfs_path path;
3656         struct shared_node root_node;
3657         struct root_record *rec;
3658         struct btrfs_root_item *root_item = &root->root_item;
3659         struct cache_tree corrupt_blocks;
3660         struct orphan_data_extent *orphan;
3661         struct orphan_data_extent *tmp;
3662         enum btrfs_tree_block_status status;
3663         struct node_refs nrefs;
3664
3665         /*
3666          * Reuse the corrupt_block cache tree to record corrupted tree block
3667          *
3668          * Unlike the usage in extent tree check, here we do it in a per
3669          * fs/subvol tree base.
3670          */
3671         cache_tree_init(&corrupt_blocks);
3672         root->fs_info->corrupt_blocks = &corrupt_blocks;
3673
3674         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3675                 rec = get_root_rec(root_cache, root->root_key.objectid);
3676                 BUG_ON(IS_ERR(rec));
3677                 if (btrfs_root_refs(root_item) > 0)
3678                         rec->found_root_item = 1;
3679         }
3680
3681         btrfs_init_path(&path);
3682         memset(&root_node, 0, sizeof(root_node));
3683         cache_tree_init(&root_node.root_cache);
3684         cache_tree_init(&root_node.inode_cache);
3685         memset(&nrefs, 0, sizeof(nrefs));
3686
3687         /* Move the orphan extent record to corresponding inode_record */
3688         list_for_each_entry_safe(orphan, tmp,
3689                                  &root->orphan_data_extents, list) {
3690                 struct inode_record *inode;
3691
3692                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3693                                       1);
3694                 BUG_ON(IS_ERR(inode));
3695                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3696                 list_move(&orphan->list, &inode->orphan_extents);
3697         }
3698
3699         level = btrfs_header_level(root->node);
3700         memset(wc->nodes, 0, sizeof(wc->nodes));
3701         wc->nodes[level] = &root_node;
3702         wc->active_node = level;
3703         wc->root_level = level;
3704
3705         /* We may not have checked the root block, lets do that now */
3706         if (btrfs_is_leaf(root->node))
3707                 status = btrfs_check_leaf(root, NULL, root->node);
3708         else
3709                 status = btrfs_check_node(root, NULL, root->node);
3710         if (status != BTRFS_TREE_BLOCK_CLEAN)
3711                 return -EIO;
3712
3713         if (btrfs_root_refs(root_item) > 0 ||
3714             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3715                 path.nodes[level] = root->node;
3716                 extent_buffer_get(root->node);
3717                 path.slots[level] = 0;
3718         } else {
3719                 struct btrfs_key key;
3720                 struct btrfs_disk_key found_key;
3721
3722                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3723                 level = root_item->drop_level;
3724                 path.lowest_level = level;
3725                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3726                 if (wret < 0)
3727                         goto skip_walking;
3728                 btrfs_node_key(path.nodes[level], &found_key,
3729                                 path.slots[level]);
3730                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3731                                         sizeof(found_key)));
3732         }
3733
3734         while (1) {
3735                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3736                 if (wret < 0)
3737                         ret = wret;
3738                 if (wret != 0)
3739                         break;
3740
3741                 wret = walk_up_tree(root, &path, wc, &level);
3742                 if (wret < 0)
3743                         ret = wret;
3744                 if (wret != 0)
3745                         break;
3746         }
3747 skip_walking:
3748         btrfs_release_path(&path);
3749
3750         if (!cache_tree_empty(&corrupt_blocks)) {
3751                 struct cache_extent *cache;
3752                 struct btrfs_corrupt_block *corrupt;
3753
3754                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3755                        root->root_key.objectid);
3756                 cache = first_cache_extent(&corrupt_blocks);
3757                 while (cache) {
3758                         corrupt = container_of(cache,
3759                                                struct btrfs_corrupt_block,
3760                                                cache);
3761                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3762                                cache->start, corrupt->level,
3763                                corrupt->key.objectid, corrupt->key.type,
3764                                corrupt->key.offset);
3765                         cache = next_cache_extent(cache);
3766                 }
3767                 if (repair) {
3768                         printf("Try to repair the btree for root %llu\n",
3769                                root->root_key.objectid);
3770                         ret = repair_btree(root, &corrupt_blocks);
3771                         if (ret < 0)
3772                                 fprintf(stderr, "Failed to repair btree: %s\n",
3773                                         strerror(-ret));
3774                         if (!ret)
3775                                 printf("Btree for root %llu is fixed\n",
3776                                        root->root_key.objectid);
3777                 }
3778         }
3779
3780         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3781         if (err < 0)
3782                 ret = err;
3783
3784         if (root_node.current) {
3785                 root_node.current->checked = 1;
3786                 maybe_free_inode_rec(&root_node.inode_cache,
3787                                 root_node.current);
3788         }
3789
3790         err = check_inode_recs(root, &root_node.inode_cache);
3791         if (!ret)
3792                 ret = err;
3793
3794         free_corrupt_blocks_tree(&corrupt_blocks);
3795         root->fs_info->corrupt_blocks = NULL;
3796         free_orphan_data_extents(&root->orphan_data_extents);
3797         return ret;
3798 }
3799
3800 static int fs_root_objectid(u64 objectid)
3801 {
3802         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3803             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3804                 return 1;
3805         return is_fstree(objectid);
3806 }
3807
3808 static int check_fs_roots(struct btrfs_root *root,
3809                           struct cache_tree *root_cache)
3810 {
3811         struct btrfs_path path;
3812         struct btrfs_key key;
3813         struct walk_control wc;
3814         struct extent_buffer *leaf, *tree_node;
3815         struct btrfs_root *tmp_root;
3816         struct btrfs_root *tree_root = root->fs_info->tree_root;
3817         int ret;
3818         int err = 0;
3819
3820         if (ctx.progress_enabled) {
3821                 ctx.tp = TASK_FS_ROOTS;
3822                 task_start(ctx.info);
3823         }
3824
3825         /*
3826          * Just in case we made any changes to the extent tree that weren't
3827          * reflected into the free space cache yet.
3828          */
3829         if (repair)
3830                 reset_cached_block_groups(root->fs_info);
3831         memset(&wc, 0, sizeof(wc));
3832         cache_tree_init(&wc.shared);
3833         btrfs_init_path(&path);
3834
3835 again:
3836         key.offset = 0;
3837         key.objectid = 0;
3838         key.type = BTRFS_ROOT_ITEM_KEY;
3839         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3840         if (ret < 0) {
3841                 err = 1;
3842                 goto out;
3843         }
3844         tree_node = tree_root->node;
3845         while (1) {
3846                 if (tree_node != tree_root->node) {
3847                         free_root_recs_tree(root_cache);
3848                         btrfs_release_path(&path);
3849                         goto again;
3850                 }
3851                 leaf = path.nodes[0];
3852                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3853                         ret = btrfs_next_leaf(tree_root, &path);
3854                         if (ret) {
3855                                 if (ret < 0)
3856                                         err = 1;
3857                                 break;
3858                         }
3859                         leaf = path.nodes[0];
3860                 }
3861                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3862                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3863                     fs_root_objectid(key.objectid)) {
3864                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3865                                 tmp_root = btrfs_read_fs_root_no_cache(
3866                                                 root->fs_info, &key);
3867                         } else {
3868                                 key.offset = (u64)-1;
3869                                 tmp_root = btrfs_read_fs_root(
3870                                                 root->fs_info, &key);
3871                         }
3872                         if (IS_ERR(tmp_root)) {
3873                                 err = 1;
3874                                 goto next;
3875                         }
3876                         ret = check_fs_root(tmp_root, root_cache, &wc);
3877                         if (ret == -EAGAIN) {
3878                                 free_root_recs_tree(root_cache);
3879                                 btrfs_release_path(&path);
3880                                 goto again;
3881                         }
3882                         if (ret)
3883                                 err = 1;
3884                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3885                                 btrfs_free_fs_root(tmp_root);
3886                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3887                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3888                         process_root_ref(leaf, path.slots[0], &key,
3889                                          root_cache);
3890                 }
3891 next:
3892                 path.slots[0]++;
3893         }
3894 out:
3895         btrfs_release_path(&path);
3896         if (err)
3897                 free_extent_cache_tree(&wc.shared);
3898         if (!cache_tree_empty(&wc.shared))
3899                 fprintf(stderr, "warning line %d\n", __LINE__);
3900
3901         task_stop(ctx.info);
3902
3903         return err;
3904 }
3905
3906 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3907 {
3908         struct rb_node *n;
3909         struct extent_backref *back;
3910         struct tree_backref *tback;
3911         struct data_backref *dback;
3912         u64 found = 0;
3913         int err = 0;
3914
3915         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3916                 back = rb_node_to_extent_backref(n);
3917                 if (!back->found_extent_tree) {
3918                         err = 1;
3919                         if (!print_errs)
3920                                 goto out;
3921                         if (back->is_data) {
3922                                 dback = to_data_backref(back);
3923                                 fprintf(stderr, "Backref %llu %s %llu"
3924                                         " owner %llu offset %llu num_refs %lu"
3925                                         " not found in extent tree\n",
3926                                         (unsigned long long)rec->start,
3927                                         back->full_backref ?
3928                                         "parent" : "root",
3929                                         back->full_backref ?
3930                                         (unsigned long long)dback->parent:
3931                                         (unsigned long long)dback->root,
3932                                         (unsigned long long)dback->owner,
3933                                         (unsigned long long)dback->offset,
3934                                         (unsigned long)dback->num_refs);
3935                         } else {
3936                                 tback = to_tree_backref(back);
3937                                 fprintf(stderr, "Backref %llu parent %llu"
3938                                         " root %llu not found in extent tree\n",
3939                                         (unsigned long long)rec->start,
3940                                         (unsigned long long)tback->parent,
3941                                         (unsigned long long)tback->root);
3942                         }
3943                 }
3944                 if (!back->is_data && !back->found_ref) {
3945                         err = 1;
3946                         if (!print_errs)
3947                                 goto out;
3948                         tback = to_tree_backref(back);
3949                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3950                                 (unsigned long long)rec->start,
3951                                 back->full_backref ? "parent" : "root",
3952                                 back->full_backref ?
3953                                 (unsigned long long)tback->parent :
3954                                 (unsigned long long)tback->root, back);
3955                 }
3956                 if (back->is_data) {
3957                         dback = to_data_backref(back);
3958                         if (dback->found_ref != dback->num_refs) {
3959                                 err = 1;
3960                                 if (!print_errs)
3961                                         goto out;
3962                                 fprintf(stderr, "Incorrect local backref count"
3963                                         " on %llu %s %llu owner %llu"
3964                                         " offset %llu found %u wanted %u back %p\n",
3965                                         (unsigned long long)rec->start,
3966                                         back->full_backref ?
3967                                         "parent" : "root",
3968                                         back->full_backref ?
3969                                         (unsigned long long)dback->parent:
3970                                         (unsigned long long)dback->root,
3971                                         (unsigned long long)dback->owner,
3972                                         (unsigned long long)dback->offset,
3973                                         dback->found_ref, dback->num_refs, back);
3974                         }
3975                         if (dback->disk_bytenr != rec->start) {
3976                                 err = 1;
3977                                 if (!print_errs)
3978                                         goto out;
3979                                 fprintf(stderr, "Backref disk bytenr does not"
3980                                         " match extent record, bytenr=%llu, "
3981                                         "ref bytenr=%llu\n",
3982                                         (unsigned long long)rec->start,
3983                                         (unsigned long long)dback->disk_bytenr);
3984                         }
3985
3986                         if (dback->bytes != rec->nr) {
3987                                 err = 1;
3988                                 if (!print_errs)
3989                                         goto out;
3990                                 fprintf(stderr, "Backref bytes do not match "
3991                                         "extent backref, bytenr=%llu, ref "
3992                                         "bytes=%llu, backref bytes=%llu\n",
3993                                         (unsigned long long)rec->start,
3994                                         (unsigned long long)rec->nr,
3995                                         (unsigned long long)dback->bytes);
3996                         }
3997                 }
3998                 if (!back->is_data) {
3999                         found += 1;
4000                 } else {
4001                         dback = to_data_backref(back);
4002                         found += dback->found_ref;
4003                 }
4004         }
4005         if (found != rec->refs) {
4006                 err = 1;
4007                 if (!print_errs)
4008                         goto out;
4009                 fprintf(stderr, "Incorrect global backref count "
4010                         "on %llu found %llu wanted %llu\n",
4011                         (unsigned long long)rec->start,
4012                         (unsigned long long)found,
4013                         (unsigned long long)rec->refs);
4014         }
4015 out:
4016         return err;
4017 }
4018
4019 static void __free_one_backref(struct rb_node *node)
4020 {
4021         struct extent_backref *back = rb_node_to_extent_backref(node);
4022
4023         free(back);
4024 }
4025
4026 static void free_all_extent_backrefs(struct extent_record *rec)
4027 {
4028         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4029 }
4030
4031 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4032                                      struct cache_tree *extent_cache)
4033 {
4034         struct cache_extent *cache;
4035         struct extent_record *rec;
4036
4037         while (1) {
4038                 cache = first_cache_extent(extent_cache);
4039                 if (!cache)
4040                         break;
4041                 rec = container_of(cache, struct extent_record, cache);
4042                 remove_cache_extent(extent_cache, cache);
4043                 free_all_extent_backrefs(rec);
4044                 free(rec);
4045         }
4046 }
4047
4048 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4049                                  struct extent_record *rec)
4050 {
4051         if (rec->content_checked && rec->owner_ref_checked &&
4052             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4053             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4054             !rec->bad_full_backref && !rec->crossing_stripes &&
4055             !rec->wrong_chunk_type) {
4056                 remove_cache_extent(extent_cache, &rec->cache);
4057                 free_all_extent_backrefs(rec);
4058                 list_del_init(&rec->list);
4059                 free(rec);
4060         }
4061         return 0;
4062 }
4063
4064 static int check_owner_ref(struct btrfs_root *root,
4065                             struct extent_record *rec,
4066                             struct extent_buffer *buf)
4067 {
4068         struct extent_backref *node, *tmp;
4069         struct tree_backref *back;
4070         struct btrfs_root *ref_root;
4071         struct btrfs_key key;
4072         struct btrfs_path path;
4073         struct extent_buffer *parent;
4074         int level;
4075         int found = 0;
4076         int ret;
4077
4078         rbtree_postorder_for_each_entry_safe(node, tmp,
4079                                              &rec->backref_tree, node) {
4080                 if (node->is_data)
4081                         continue;
4082                 if (!node->found_ref)
4083                         continue;
4084                 if (node->full_backref)
4085                         continue;
4086                 back = to_tree_backref(node);
4087                 if (btrfs_header_owner(buf) == back->root)
4088                         return 0;
4089         }
4090         BUG_ON(rec->is_root);
4091
4092         /* try to find the block by search corresponding fs tree */
4093         key.objectid = btrfs_header_owner(buf);
4094         key.type = BTRFS_ROOT_ITEM_KEY;
4095         key.offset = (u64)-1;
4096
4097         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4098         if (IS_ERR(ref_root))
4099                 return 1;
4100
4101         level = btrfs_header_level(buf);
4102         if (level == 0)
4103                 btrfs_item_key_to_cpu(buf, &key, 0);
4104         else
4105                 btrfs_node_key_to_cpu(buf, &key, 0);
4106
4107         btrfs_init_path(&path);
4108         path.lowest_level = level + 1;
4109         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4110         if (ret < 0)
4111                 return 0;
4112
4113         parent = path.nodes[level + 1];
4114         if (parent && buf->start == btrfs_node_blockptr(parent,
4115                                                         path.slots[level + 1]))
4116                 found = 1;
4117
4118         btrfs_release_path(&path);
4119         return found ? 0 : 1;
4120 }
4121
4122 static int is_extent_tree_record(struct extent_record *rec)
4123 {
4124         struct extent_backref *ref, *tmp;
4125         struct tree_backref *back;
4126         int is_extent = 0;
4127
4128         rbtree_postorder_for_each_entry_safe(ref, tmp,
4129                                              &rec->backref_tree, node) {
4130                 if (ref->is_data)
4131                         return 0;
4132                 back = to_tree_backref(ref);
4133                 if (ref->full_backref)
4134                         return 0;
4135                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4136                         is_extent = 1;
4137         }
4138         return is_extent;
4139 }
4140
4141
4142 static int record_bad_block_io(struct btrfs_fs_info *info,
4143                                struct cache_tree *extent_cache,
4144                                u64 start, u64 len)
4145 {
4146         struct extent_record *rec;
4147         struct cache_extent *cache;
4148         struct btrfs_key key;
4149
4150         cache = lookup_cache_extent(extent_cache, start, len);
4151         if (!cache)
4152                 return 0;
4153
4154         rec = container_of(cache, struct extent_record, cache);
4155         if (!is_extent_tree_record(rec))
4156                 return 0;
4157
4158         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4159         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4160 }
4161
4162 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4163                        struct extent_buffer *buf, int slot)
4164 {
4165         if (btrfs_header_level(buf)) {
4166                 struct btrfs_key_ptr ptr1, ptr2;
4167
4168                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4169                                    sizeof(struct btrfs_key_ptr));
4170                 read_extent_buffer(buf, &ptr2,
4171                                    btrfs_node_key_ptr_offset(slot + 1),
4172                                    sizeof(struct btrfs_key_ptr));
4173                 write_extent_buffer(buf, &ptr1,
4174                                     btrfs_node_key_ptr_offset(slot + 1),
4175                                     sizeof(struct btrfs_key_ptr));
4176                 write_extent_buffer(buf, &ptr2,
4177                                     btrfs_node_key_ptr_offset(slot),
4178                                     sizeof(struct btrfs_key_ptr));
4179                 if (slot == 0) {
4180                         struct btrfs_disk_key key;
4181                         btrfs_node_key(buf, &key, 0);
4182                         btrfs_fixup_low_keys(root, path, &key,
4183                                              btrfs_header_level(buf) + 1);
4184                 }
4185         } else {
4186                 struct btrfs_item *item1, *item2;
4187                 struct btrfs_key k1, k2;
4188                 char *item1_data, *item2_data;
4189                 u32 item1_offset, item2_offset, item1_size, item2_size;
4190
4191                 item1 = btrfs_item_nr(slot);
4192                 item2 = btrfs_item_nr(slot + 1);
4193                 btrfs_item_key_to_cpu(buf, &k1, slot);
4194                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4195                 item1_offset = btrfs_item_offset(buf, item1);
4196                 item2_offset = btrfs_item_offset(buf, item2);
4197                 item1_size = btrfs_item_size(buf, item1);
4198                 item2_size = btrfs_item_size(buf, item2);
4199
4200                 item1_data = malloc(item1_size);
4201                 if (!item1_data)
4202                         return -ENOMEM;
4203                 item2_data = malloc(item2_size);
4204                 if (!item2_data) {
4205                         free(item1_data);
4206                         return -ENOMEM;
4207                 }
4208
4209                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4210                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4211
4212                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4213                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4214                 free(item1_data);
4215                 free(item2_data);
4216
4217                 btrfs_set_item_offset(buf, item1, item2_offset);
4218                 btrfs_set_item_offset(buf, item2, item1_offset);
4219                 btrfs_set_item_size(buf, item1, item2_size);
4220                 btrfs_set_item_size(buf, item2, item1_size);
4221
4222                 path->slots[0] = slot;
4223                 btrfs_set_item_key_unsafe(root, path, &k2);
4224                 path->slots[0] = slot + 1;
4225                 btrfs_set_item_key_unsafe(root, path, &k1);
4226         }
4227         return 0;
4228 }
4229
4230 static int fix_key_order(struct btrfs_trans_handle *trans,
4231                          struct btrfs_root *root,
4232                          struct btrfs_path *path)
4233 {
4234         struct extent_buffer *buf;
4235         struct btrfs_key k1, k2;
4236         int i;
4237         int level = path->lowest_level;
4238         int ret = -EIO;
4239
4240         buf = path->nodes[level];
4241         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4242                 if (level) {
4243                         btrfs_node_key_to_cpu(buf, &k1, i);
4244                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4245                 } else {
4246                         btrfs_item_key_to_cpu(buf, &k1, i);
4247                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4248                 }
4249                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4250                         continue;
4251                 ret = swap_values(root, path, buf, i);
4252                 if (ret)
4253                         break;
4254                 btrfs_mark_buffer_dirty(buf);
4255                 i = 0;
4256         }
4257         return ret;
4258 }
4259
4260 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4261                              struct btrfs_root *root,
4262                              struct btrfs_path *path,
4263                              struct extent_buffer *buf, int slot)
4264 {
4265         struct btrfs_key key;
4266         int nritems = btrfs_header_nritems(buf);
4267
4268         btrfs_item_key_to_cpu(buf, &key, slot);
4269
4270         /* These are all the keys we can deal with missing. */
4271         if (key.type != BTRFS_DIR_INDEX_KEY &&
4272             key.type != BTRFS_EXTENT_ITEM_KEY &&
4273             key.type != BTRFS_METADATA_ITEM_KEY &&
4274             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4275             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4276                 return -1;
4277
4278         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4279                (unsigned long long)key.objectid, key.type,
4280                (unsigned long long)key.offset, slot, buf->start);
4281         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4282                               btrfs_item_nr_offset(slot + 1),
4283                               sizeof(struct btrfs_item) *
4284                               (nritems - slot - 1));
4285         btrfs_set_header_nritems(buf, nritems - 1);
4286         if (slot == 0) {
4287                 struct btrfs_disk_key disk_key;
4288
4289                 btrfs_item_key(buf, &disk_key, 0);
4290                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4291         }
4292         btrfs_mark_buffer_dirty(buf);
4293         return 0;
4294 }
4295
4296 static int fix_item_offset(struct btrfs_trans_handle *trans,
4297                            struct btrfs_root *root,
4298                            struct btrfs_path *path)
4299 {
4300         struct extent_buffer *buf;
4301         int i;
4302         int ret = 0;
4303
4304         /* We should only get this for leaves */
4305         BUG_ON(path->lowest_level);
4306         buf = path->nodes[0];
4307 again:
4308         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4309                 unsigned int shift = 0, offset;
4310
4311                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4312                     BTRFS_LEAF_DATA_SIZE(root)) {
4313                         if (btrfs_item_end_nr(buf, i) >
4314                             BTRFS_LEAF_DATA_SIZE(root)) {
4315                                 ret = delete_bogus_item(trans, root, path,
4316                                                         buf, i);
4317                                 if (!ret)
4318                                         goto again;
4319                                 fprintf(stderr, "item is off the end of the "
4320                                         "leaf, can't fix\n");
4321                                 ret = -EIO;
4322                                 break;
4323                         }
4324                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4325                                 btrfs_item_end_nr(buf, i);
4326                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4327                            btrfs_item_offset_nr(buf, i - 1)) {
4328                         if (btrfs_item_end_nr(buf, i) >
4329                             btrfs_item_offset_nr(buf, i - 1)) {
4330                                 ret = delete_bogus_item(trans, root, path,
4331                                                         buf, i);
4332                                 if (!ret)
4333                                         goto again;
4334                                 fprintf(stderr, "items overlap, can't fix\n");
4335                                 ret = -EIO;
4336                                 break;
4337                         }
4338                         shift = btrfs_item_offset_nr(buf, i - 1) -
4339                                 btrfs_item_end_nr(buf, i);
4340                 }
4341                 if (!shift)
4342                         continue;
4343
4344                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4345                        i, shift, (unsigned long long)buf->start);
4346                 offset = btrfs_item_offset_nr(buf, i);
4347                 memmove_extent_buffer(buf,
4348                                       btrfs_leaf_data(buf) + offset + shift,
4349                                       btrfs_leaf_data(buf) + offset,
4350                                       btrfs_item_size_nr(buf, i));
4351                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4352                                       offset + shift);
4353                 btrfs_mark_buffer_dirty(buf);
4354         }
4355
4356         /*
4357          * We may have moved things, in which case we want to exit so we don't
4358          * write those changes out.  Once we have proper abort functionality in
4359          * progs this can be changed to something nicer.
4360          */
4361         BUG_ON(ret);
4362         return ret;
4363 }
4364
4365 /*
4366  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4367  * then just return -EIO.
4368  */
4369 static int try_to_fix_bad_block(struct btrfs_root *root,
4370                                 struct extent_buffer *buf,
4371                                 enum btrfs_tree_block_status status)
4372 {
4373         struct btrfs_trans_handle *trans;
4374         struct ulist *roots;
4375         struct ulist_node *node;
4376         struct btrfs_root *search_root;
4377         struct btrfs_path *path;
4378         struct ulist_iterator iter;
4379         struct btrfs_key root_key, key;
4380         int ret;
4381
4382         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4383             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4384                 return -EIO;
4385
4386         path = btrfs_alloc_path();
4387         if (!path)
4388                 return -EIO;
4389
4390         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4391                                    0, &roots);
4392         if (ret) {
4393                 btrfs_free_path(path);
4394                 return -EIO;
4395         }
4396
4397         ULIST_ITER_INIT(&iter);
4398         while ((node = ulist_next(roots, &iter))) {
4399                 root_key.objectid = node->val;
4400                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4401                 root_key.offset = (u64)-1;
4402
4403                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4404                 if (IS_ERR(root)) {
4405                         ret = -EIO;
4406                         break;
4407                 }
4408
4409
4410                 trans = btrfs_start_transaction(search_root, 0);
4411                 if (IS_ERR(trans)) {
4412                         ret = PTR_ERR(trans);
4413                         break;
4414                 }
4415
4416                 path->lowest_level = btrfs_header_level(buf);
4417                 path->skip_check_block = 1;
4418                 if (path->lowest_level)
4419                         btrfs_node_key_to_cpu(buf, &key, 0);
4420                 else
4421                         btrfs_item_key_to_cpu(buf, &key, 0);
4422                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4423                 if (ret) {
4424                         ret = -EIO;
4425                         btrfs_commit_transaction(trans, search_root);
4426                         break;
4427                 }
4428                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4429                         ret = fix_key_order(trans, search_root, path);
4430                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4431                         ret = fix_item_offset(trans, search_root, path);
4432                 if (ret) {
4433                         btrfs_commit_transaction(trans, search_root);
4434                         break;
4435                 }
4436                 btrfs_release_path(path);
4437                 btrfs_commit_transaction(trans, search_root);
4438         }
4439         ulist_free(roots);
4440         btrfs_free_path(path);
4441         return ret;
4442 }
4443
4444 static int check_block(struct btrfs_root *root,
4445                        struct cache_tree *extent_cache,
4446                        struct extent_buffer *buf, u64 flags)
4447 {
4448         struct extent_record *rec;
4449         struct cache_extent *cache;
4450         struct btrfs_key key;
4451         enum btrfs_tree_block_status status;
4452         int ret = 0;
4453         int level;
4454
4455         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4456         if (!cache)
4457                 return 1;
4458         rec = container_of(cache, struct extent_record, cache);
4459         rec->generation = btrfs_header_generation(buf);
4460
4461         level = btrfs_header_level(buf);
4462         if (btrfs_header_nritems(buf) > 0) {
4463
4464                 if (level == 0)
4465                         btrfs_item_key_to_cpu(buf, &key, 0);
4466                 else
4467                         btrfs_node_key_to_cpu(buf, &key, 0);
4468
4469                 rec->info_objectid = key.objectid;
4470         }
4471         rec->info_level = level;
4472
4473         if (btrfs_is_leaf(buf))
4474                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4475         else
4476                 status = btrfs_check_node(root, &rec->parent_key, buf);
4477
4478         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4479                 if (repair)
4480                         status = try_to_fix_bad_block(root, buf, status);
4481                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4482                         ret = -EIO;
4483                         fprintf(stderr, "bad block %llu\n",
4484                                 (unsigned long long)buf->start);
4485                 } else {
4486                         /*
4487                          * Signal to callers we need to start the scan over
4488                          * again since we'll have cowed blocks.
4489                          */
4490                         ret = -EAGAIN;
4491                 }
4492         } else {
4493                 rec->content_checked = 1;
4494                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4495                         rec->owner_ref_checked = 1;
4496                 else {
4497                         ret = check_owner_ref(root, rec, buf);
4498                         if (!ret)
4499                                 rec->owner_ref_checked = 1;
4500                 }
4501         }
4502         if (!ret)
4503                 maybe_free_extent_rec(extent_cache, rec);
4504         return ret;
4505 }
4506
4507
4508 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4509                                                 u64 parent, u64 root)
4510 {
4511         struct rb_node *node;
4512         struct tree_backref *back = NULL;
4513         struct tree_backref match = {
4514                 .node = {
4515                         .is_data = 0,
4516                 },
4517         };
4518
4519         if (parent) {
4520                 match.parent = parent;
4521                 match.node.full_backref = 1;
4522         } else {
4523                 match.root = root;
4524         }
4525
4526         node = rb_search(&rec->backref_tree, &match.node.node,
4527                          (rb_compare_keys)compare_extent_backref, NULL);
4528         if (node)
4529                 back = to_tree_backref(rb_node_to_extent_backref(node));
4530
4531         return back;
4532 }
4533
4534 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4535                                                 u64 parent, u64 root)
4536 {
4537         struct tree_backref *ref = malloc(sizeof(*ref));
4538
4539         if (!ref)
4540                 return NULL;
4541         memset(&ref->node, 0, sizeof(ref->node));
4542         if (parent > 0) {
4543                 ref->parent = parent;
4544                 ref->node.full_backref = 1;
4545         } else {
4546                 ref->root = root;
4547                 ref->node.full_backref = 0;
4548         }
4549         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4550
4551         return ref;
4552 }
4553
4554 static struct data_backref *find_data_backref(struct extent_record *rec,
4555                                                 u64 parent, u64 root,
4556                                                 u64 owner, u64 offset,
4557                                                 int found_ref,
4558                                                 u64 disk_bytenr, u64 bytes)
4559 {
4560         struct rb_node *node;
4561         struct data_backref *back = NULL;
4562         struct data_backref match = {
4563                 .node = {
4564                         .is_data = 1,
4565                 },
4566                 .owner = owner,
4567                 .offset = offset,
4568                 .bytes = bytes,
4569                 .found_ref = found_ref,
4570                 .disk_bytenr = disk_bytenr,
4571         };
4572
4573         if (parent) {
4574                 match.parent = parent;
4575                 match.node.full_backref = 1;
4576         } else {
4577                 match.root = root;
4578         }
4579
4580         node = rb_search(&rec->backref_tree, &match.node.node,
4581                          (rb_compare_keys)compare_extent_backref, NULL);
4582         if (node)
4583                 back = to_data_backref(rb_node_to_extent_backref(node));
4584
4585         return back;
4586 }
4587
4588 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4589                                                 u64 parent, u64 root,
4590                                                 u64 owner, u64 offset,
4591                                                 u64 max_size)
4592 {
4593         struct data_backref *ref = malloc(sizeof(*ref));
4594
4595         if (!ref)
4596                 return NULL;
4597         memset(&ref->node, 0, sizeof(ref->node));
4598         ref->node.is_data = 1;
4599
4600         if (parent > 0) {
4601                 ref->parent = parent;
4602                 ref->owner = 0;
4603                 ref->offset = 0;
4604                 ref->node.full_backref = 1;
4605         } else {
4606                 ref->root = root;
4607                 ref->owner = owner;
4608                 ref->offset = offset;
4609                 ref->node.full_backref = 0;
4610         }
4611         ref->bytes = max_size;
4612         ref->found_ref = 0;
4613         ref->num_refs = 0;
4614         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4615         if (max_size > rec->max_size)
4616                 rec->max_size = max_size;
4617         return ref;
4618 }
4619
4620 /* Check if the type of extent matches with its chunk */
4621 static void check_extent_type(struct extent_record *rec)
4622 {
4623         struct btrfs_block_group_cache *bg_cache;
4624
4625         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4626         if (!bg_cache)
4627                 return;
4628
4629         /* data extent, check chunk directly*/
4630         if (!rec->metadata) {
4631                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4632                         rec->wrong_chunk_type = 1;
4633                 return;
4634         }
4635
4636         /* metadata extent, check the obvious case first */
4637         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4638                                  BTRFS_BLOCK_GROUP_METADATA))) {
4639                 rec->wrong_chunk_type = 1;
4640                 return;
4641         }
4642
4643         /*
4644          * Check SYSTEM extent, as it's also marked as metadata, we can only
4645          * make sure it's a SYSTEM extent by its backref
4646          */
4647         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4648                 struct extent_backref *node;
4649                 struct tree_backref *tback;
4650                 u64 bg_type;
4651
4652                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4653                 if (node->is_data) {
4654                         /* tree block shouldn't have data backref */
4655                         rec->wrong_chunk_type = 1;
4656                         return;
4657                 }
4658                 tback = container_of(node, struct tree_backref, node);
4659
4660                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4661                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4662                 else
4663                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4664                 if (!(bg_cache->flags & bg_type))
4665                         rec->wrong_chunk_type = 1;
4666         }
4667 }
4668
4669 /*
4670  * Allocate a new extent record, fill default values from @tmpl and insert int
4671  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4672  * the cache, otherwise it fails.
4673  */
4674 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4675                 struct extent_record *tmpl)
4676 {
4677         struct extent_record *rec;
4678         int ret = 0;
4679
4680         rec = malloc(sizeof(*rec));
4681         if (!rec)
4682                 return -ENOMEM;
4683         rec->start = tmpl->start;
4684         rec->max_size = tmpl->max_size;
4685         rec->nr = max(tmpl->nr, tmpl->max_size);
4686         rec->found_rec = tmpl->found_rec;
4687         rec->content_checked = tmpl->content_checked;
4688         rec->owner_ref_checked = tmpl->owner_ref_checked;
4689         rec->num_duplicates = 0;
4690         rec->metadata = tmpl->metadata;
4691         rec->flag_block_full_backref = FLAG_UNSET;
4692         rec->bad_full_backref = 0;
4693         rec->crossing_stripes = 0;
4694         rec->wrong_chunk_type = 0;
4695         rec->is_root = tmpl->is_root;
4696         rec->refs = tmpl->refs;
4697         rec->extent_item_refs = tmpl->extent_item_refs;
4698         rec->parent_generation = tmpl->parent_generation;
4699         INIT_LIST_HEAD(&rec->backrefs);
4700         INIT_LIST_HEAD(&rec->dups);
4701         INIT_LIST_HEAD(&rec->list);
4702         rec->backref_tree = RB_ROOT;
4703         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4704         rec->cache.start = tmpl->start;
4705         rec->cache.size = tmpl->nr;
4706         ret = insert_cache_extent(extent_cache, &rec->cache);
4707         BUG_ON(ret);
4708         bytes_used += rec->nr;
4709
4710         if (tmpl->metadata)
4711                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4712                                 global_info->tree_root->nodesize);
4713         check_extent_type(rec);
4714         return ret;
4715 }
4716
4717 /*
4718  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4719  * some are hints:
4720  * - refs              - if found, increase refs
4721  * - is_root           - if found, set
4722  * - content_checked   - if found, set
4723  * - owner_ref_checked - if found, set
4724  *
4725  * If not found, create a new one, initialize and insert.
4726  */
4727 static int add_extent_rec(struct cache_tree *extent_cache,
4728                 struct extent_record *tmpl)
4729 {
4730         struct extent_record *rec;
4731         struct cache_extent *cache;
4732         int ret = 0;
4733         int dup = 0;
4734
4735         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4736         if (cache) {
4737                 rec = container_of(cache, struct extent_record, cache);
4738                 if (tmpl->refs)
4739                         rec->refs++;
4740                 if (rec->nr == 1)
4741                         rec->nr = max(tmpl->nr, tmpl->max_size);
4742
4743                 /*
4744                  * We need to make sure to reset nr to whatever the extent
4745                  * record says was the real size, this way we can compare it to
4746                  * the backrefs.
4747                  */
4748                 if (tmpl->found_rec) {
4749                         if (tmpl->start != rec->start || rec->found_rec) {
4750                                 struct extent_record *tmp;
4751
4752                                 dup = 1;
4753                                 if (list_empty(&rec->list))
4754                                         list_add_tail(&rec->list,
4755                                                       &duplicate_extents);
4756
4757                                 /*
4758                                  * We have to do this song and dance in case we
4759                                  * find an extent record that falls inside of
4760                                  * our current extent record but does not have
4761                                  * the same objectid.
4762                                  */
4763                                 tmp = malloc(sizeof(*tmp));
4764                                 if (!tmp)
4765                                         return -ENOMEM;
4766                                 tmp->start = tmpl->start;
4767                                 tmp->max_size = tmpl->max_size;
4768                                 tmp->nr = tmpl->nr;
4769                                 tmp->found_rec = 1;
4770                                 tmp->metadata = tmpl->metadata;
4771                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4772                                 INIT_LIST_HEAD(&tmp->list);
4773                                 list_add_tail(&tmp->list, &rec->dups);
4774                                 rec->num_duplicates++;
4775                         } else {
4776                                 rec->nr = tmpl->nr;
4777                                 rec->found_rec = 1;
4778                         }
4779                 }
4780
4781                 if (tmpl->extent_item_refs && !dup) {
4782                         if (rec->extent_item_refs) {
4783                                 fprintf(stderr, "block %llu rec "
4784                                         "extent_item_refs %llu, passed %llu\n",
4785                                         (unsigned long long)tmpl->start,
4786                                         (unsigned long long)
4787                                                         rec->extent_item_refs,
4788                                         (unsigned long long)tmpl->extent_item_refs);
4789                         }
4790                         rec->extent_item_refs = tmpl->extent_item_refs;
4791                 }
4792                 if (tmpl->is_root)
4793                         rec->is_root = 1;
4794                 if (tmpl->content_checked)
4795                         rec->content_checked = 1;
4796                 if (tmpl->owner_ref_checked)
4797                         rec->owner_ref_checked = 1;
4798                 memcpy(&rec->parent_key, &tmpl->parent_key,
4799                                 sizeof(tmpl->parent_key));
4800                 if (tmpl->parent_generation)
4801                         rec->parent_generation = tmpl->parent_generation;
4802                 if (rec->max_size < tmpl->max_size)
4803                         rec->max_size = tmpl->max_size;
4804
4805                 /*
4806                  * A metadata extent can't cross stripe_len boundary, otherwise
4807                  * kernel scrub won't be able to handle it.
4808                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4809                  * it.
4810                  */
4811                 if (tmpl->metadata)
4812                         rec->crossing_stripes = check_crossing_stripes(
4813                                 rec->start, global_info->tree_root->nodesize);
4814                 check_extent_type(rec);
4815                 maybe_free_extent_rec(extent_cache, rec);
4816                 return ret;
4817         }
4818
4819         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4820
4821         return ret;
4822 }
4823
4824 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4825                             u64 parent, u64 root, int found_ref)
4826 {
4827         struct extent_record *rec;
4828         struct tree_backref *back;
4829         struct cache_extent *cache;
4830
4831         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4832         if (!cache) {
4833                 struct extent_record tmpl;
4834
4835                 memset(&tmpl, 0, sizeof(tmpl));
4836                 tmpl.start = bytenr;
4837                 tmpl.nr = 1;
4838                 tmpl.metadata = 1;
4839
4840                 add_extent_rec_nolookup(extent_cache, &tmpl);
4841
4842                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4843                 if (!cache)
4844                         abort();
4845         }
4846
4847         rec = container_of(cache, struct extent_record, cache);
4848         if (rec->start != bytenr) {
4849                 abort();
4850         }
4851
4852         back = find_tree_backref(rec, parent, root);
4853         if (!back) {
4854                 back = alloc_tree_backref(rec, parent, root);
4855                 BUG_ON(!back);
4856         }
4857
4858         if (found_ref) {
4859                 if (back->node.found_ref) {
4860                         fprintf(stderr, "Extent back ref already exists "
4861                                 "for %llu parent %llu root %llu \n",
4862                                 (unsigned long long)bytenr,
4863                                 (unsigned long long)parent,
4864                                 (unsigned long long)root);
4865                 }
4866                 back->node.found_ref = 1;
4867         } else {
4868                 if (back->node.found_extent_tree) {
4869                         fprintf(stderr, "Extent back ref already exists "
4870                                 "for %llu parent %llu root %llu \n",
4871                                 (unsigned long long)bytenr,
4872                                 (unsigned long long)parent,
4873                                 (unsigned long long)root);
4874                 }
4875                 back->node.found_extent_tree = 1;
4876         }
4877         check_extent_type(rec);
4878         maybe_free_extent_rec(extent_cache, rec);
4879         return 0;
4880 }
4881
4882 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4883                             u64 parent, u64 root, u64 owner, u64 offset,
4884                             u32 num_refs, int found_ref, u64 max_size)
4885 {
4886         struct extent_record *rec;
4887         struct data_backref *back;
4888         struct cache_extent *cache;
4889
4890         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4891         if (!cache) {
4892                 struct extent_record tmpl;
4893
4894                 memset(&tmpl, 0, sizeof(tmpl));
4895                 tmpl.start = bytenr;
4896                 tmpl.nr = 1;
4897                 tmpl.max_size = max_size;
4898
4899                 add_extent_rec_nolookup(extent_cache, &tmpl);
4900
4901                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4902                 if (!cache)
4903                         abort();
4904         }
4905
4906         rec = container_of(cache, struct extent_record, cache);
4907         if (rec->max_size < max_size)
4908                 rec->max_size = max_size;
4909
4910         /*
4911          * If found_ref is set then max_size is the real size and must match the
4912          * existing refs.  So if we have already found a ref then we need to
4913          * make sure that this ref matches the existing one, otherwise we need
4914          * to add a new backref so we can notice that the backrefs don't match
4915          * and we need to figure out who is telling the truth.  This is to
4916          * account for that awful fsync bug I introduced where we'd end up with
4917          * a btrfs_file_extent_item that would have its length include multiple
4918          * prealloc extents or point inside of a prealloc extent.
4919          */
4920         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4921                                  bytenr, max_size);
4922         if (!back) {
4923                 back = alloc_data_backref(rec, parent, root, owner, offset,
4924                                           max_size);
4925                 BUG_ON(!back);
4926         }
4927
4928         if (found_ref) {
4929                 BUG_ON(num_refs != 1);
4930                 if (back->node.found_ref)
4931                         BUG_ON(back->bytes != max_size);
4932                 back->node.found_ref = 1;
4933                 back->found_ref += 1;
4934                 back->bytes = max_size;
4935                 back->disk_bytenr = bytenr;
4936                 rec->refs += 1;
4937                 rec->content_checked = 1;
4938                 rec->owner_ref_checked = 1;
4939         } else {
4940                 if (back->node.found_extent_tree) {
4941                         fprintf(stderr, "Extent back ref already exists "
4942                                 "for %llu parent %llu root %llu "
4943                                 "owner %llu offset %llu num_refs %lu\n",
4944                                 (unsigned long long)bytenr,
4945                                 (unsigned long long)parent,
4946                                 (unsigned long long)root,
4947                                 (unsigned long long)owner,
4948                                 (unsigned long long)offset,
4949                                 (unsigned long)num_refs);
4950                 }
4951                 back->num_refs = num_refs;
4952                 back->node.found_extent_tree = 1;
4953         }
4954         maybe_free_extent_rec(extent_cache, rec);
4955         return 0;
4956 }
4957
4958 static int add_pending(struct cache_tree *pending,
4959                        struct cache_tree *seen, u64 bytenr, u32 size)
4960 {
4961         int ret;
4962         ret = add_cache_extent(seen, bytenr, size);
4963         if (ret)
4964                 return ret;
4965         add_cache_extent(pending, bytenr, size);
4966         return 0;
4967 }
4968
4969 static int pick_next_pending(struct cache_tree *pending,
4970                         struct cache_tree *reada,
4971                         struct cache_tree *nodes,
4972                         u64 last, struct block_info *bits, int bits_nr,
4973                         int *reada_bits)
4974 {
4975         unsigned long node_start = last;
4976         struct cache_extent *cache;
4977         int ret;
4978
4979         cache = search_cache_extent(reada, 0);
4980         if (cache) {
4981                 bits[0].start = cache->start;
4982                 bits[0].size = cache->size;
4983                 *reada_bits = 1;
4984                 return 1;
4985         }
4986         *reada_bits = 0;
4987         if (node_start > 32768)
4988                 node_start -= 32768;
4989
4990         cache = search_cache_extent(nodes, node_start);
4991         if (!cache)
4992                 cache = search_cache_extent(nodes, 0);
4993
4994         if (!cache) {
4995                  cache = search_cache_extent(pending, 0);
4996                  if (!cache)
4997                          return 0;
4998                  ret = 0;
4999                  do {
5000                          bits[ret].start = cache->start;
5001                          bits[ret].size = cache->size;
5002                          cache = next_cache_extent(cache);
5003                          ret++;
5004                  } while (cache && ret < bits_nr);
5005                  return ret;
5006         }
5007
5008         ret = 0;
5009         do {
5010                 bits[ret].start = cache->start;
5011                 bits[ret].size = cache->size;
5012                 cache = next_cache_extent(cache);
5013                 ret++;
5014         } while (cache && ret < bits_nr);
5015
5016         if (bits_nr - ret > 8) {
5017                 u64 lookup = bits[0].start + bits[0].size;
5018                 struct cache_extent *next;
5019                 next = search_cache_extent(pending, lookup);
5020                 while(next) {
5021                         if (next->start - lookup > 32768)
5022                                 break;
5023                         bits[ret].start = next->start;
5024                         bits[ret].size = next->size;
5025                         lookup = next->start + next->size;
5026                         ret++;
5027                         if (ret == bits_nr)
5028                                 break;
5029                         next = next_cache_extent(next);
5030                         if (!next)
5031                                 break;
5032                 }
5033         }
5034         return ret;
5035 }
5036
5037 static void free_chunk_record(struct cache_extent *cache)
5038 {
5039         struct chunk_record *rec;
5040
5041         rec = container_of(cache, struct chunk_record, cache);
5042         list_del_init(&rec->list);
5043         list_del_init(&rec->dextents);
5044         free(rec);
5045 }
5046
5047 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5048 {
5049         cache_tree_free_extents(chunk_cache, free_chunk_record);
5050 }
5051
5052 static void free_device_record(struct rb_node *node)
5053 {
5054         struct device_record *rec;
5055
5056         rec = container_of(node, struct device_record, node);
5057         free(rec);
5058 }
5059
5060 FREE_RB_BASED_TREE(device_cache, free_device_record);
5061
5062 int insert_block_group_record(struct block_group_tree *tree,
5063                               struct block_group_record *bg_rec)
5064 {
5065         int ret;
5066
5067         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5068         if (ret)
5069                 return ret;
5070
5071         list_add_tail(&bg_rec->list, &tree->block_groups);
5072         return 0;
5073 }
5074
5075 static void free_block_group_record(struct cache_extent *cache)
5076 {
5077         struct block_group_record *rec;
5078
5079         rec = container_of(cache, struct block_group_record, cache);
5080         list_del_init(&rec->list);
5081         free(rec);
5082 }
5083
5084 void free_block_group_tree(struct block_group_tree *tree)
5085 {
5086         cache_tree_free_extents(&tree->tree, free_block_group_record);
5087 }
5088
5089 int insert_device_extent_record(struct device_extent_tree *tree,
5090                                 struct device_extent_record *de_rec)
5091 {
5092         int ret;
5093
5094         /*
5095          * Device extent is a bit different from the other extents, because
5096          * the extents which belong to the different devices may have the
5097          * same start and size, so we need use the special extent cache
5098          * search/insert functions.
5099          */
5100         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5101         if (ret)
5102                 return ret;
5103
5104         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5105         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5106         return 0;
5107 }
5108
5109 static void free_device_extent_record(struct cache_extent *cache)
5110 {
5111         struct device_extent_record *rec;
5112
5113         rec = container_of(cache, struct device_extent_record, cache);
5114         if (!list_empty(&rec->chunk_list))
5115                 list_del_init(&rec->chunk_list);
5116         if (!list_empty(&rec->device_list))
5117                 list_del_init(&rec->device_list);
5118         free(rec);
5119 }
5120
5121 void free_device_extent_tree(struct device_extent_tree *tree)
5122 {
5123         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5124 }
5125
5126 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5127 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5128                                  struct extent_buffer *leaf, int slot)
5129 {
5130         struct btrfs_extent_ref_v0 *ref0;
5131         struct btrfs_key key;
5132
5133         btrfs_item_key_to_cpu(leaf, &key, slot);
5134         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5135         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5136                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5137         } else {
5138                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5139                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5140         }
5141         return 0;
5142 }
5143 #endif
5144
5145 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5146                                             struct btrfs_key *key,
5147                                             int slot)
5148 {
5149         struct btrfs_chunk *ptr;
5150         struct chunk_record *rec;
5151         int num_stripes, i;
5152
5153         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5154         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5155
5156         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5157         if (!rec) {
5158                 fprintf(stderr, "memory allocation failed\n");
5159                 exit(-1);
5160         }
5161
5162         INIT_LIST_HEAD(&rec->list);
5163         INIT_LIST_HEAD(&rec->dextents);
5164         rec->bg_rec = NULL;
5165
5166         rec->cache.start = key->offset;
5167         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5168
5169         rec->generation = btrfs_header_generation(leaf);
5170
5171         rec->objectid = key->objectid;
5172         rec->type = key->type;
5173         rec->offset = key->offset;
5174
5175         rec->length = rec->cache.size;
5176         rec->owner = btrfs_chunk_owner(leaf, ptr);
5177         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5178         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5179         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5180         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5181         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5182         rec->num_stripes = num_stripes;
5183         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5184
5185         for (i = 0; i < rec->num_stripes; ++i) {
5186                 rec->stripes[i].devid =
5187                         btrfs_stripe_devid_nr(leaf, ptr, i);
5188                 rec->stripes[i].offset =
5189                         btrfs_stripe_offset_nr(leaf, ptr, i);
5190                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5191                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5192                                 BTRFS_UUID_SIZE);
5193         }
5194
5195         return rec;
5196 }
5197
5198 static int process_chunk_item(struct cache_tree *chunk_cache,
5199                               struct btrfs_key *key, struct extent_buffer *eb,
5200                               int slot)
5201 {
5202         struct chunk_record *rec;
5203         int ret = 0;
5204
5205         rec = btrfs_new_chunk_record(eb, key, slot);
5206         ret = insert_cache_extent(chunk_cache, &rec->cache);
5207         if (ret) {
5208                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5209                         rec->offset, rec->length);
5210                 free(rec);
5211         }
5212
5213         return ret;
5214 }
5215
5216 static int process_device_item(struct rb_root *dev_cache,
5217                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5218 {
5219         struct btrfs_dev_item *ptr;
5220         struct device_record *rec;
5221         int ret = 0;
5222
5223         ptr = btrfs_item_ptr(eb,
5224                 slot, struct btrfs_dev_item);
5225
5226         rec = malloc(sizeof(*rec));
5227         if (!rec) {
5228                 fprintf(stderr, "memory allocation failed\n");
5229                 return -ENOMEM;
5230         }
5231
5232         rec->devid = key->offset;
5233         rec->generation = btrfs_header_generation(eb);
5234
5235         rec->objectid = key->objectid;
5236         rec->type = key->type;
5237         rec->offset = key->offset;
5238
5239         rec->devid = btrfs_device_id(eb, ptr);
5240         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5241         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5242
5243         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5244         if (ret) {
5245                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5246                 free(rec);
5247         }
5248
5249         return ret;
5250 }
5251
5252 struct block_group_record *
5253 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5254                              int slot)
5255 {
5256         struct btrfs_block_group_item *ptr;
5257         struct block_group_record *rec;
5258
5259         rec = calloc(1, sizeof(*rec));
5260         if (!rec) {
5261                 fprintf(stderr, "memory allocation failed\n");
5262                 exit(-1);
5263         }
5264
5265         rec->cache.start = key->objectid;
5266         rec->cache.size = key->offset;
5267
5268         rec->generation = btrfs_header_generation(leaf);
5269
5270         rec->objectid = key->objectid;
5271         rec->type = key->type;
5272         rec->offset = key->offset;
5273
5274         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5275         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5276
5277         INIT_LIST_HEAD(&rec->list);
5278
5279         return rec;
5280 }
5281
5282 static int process_block_group_item(struct block_group_tree *block_group_cache,
5283                                     struct btrfs_key *key,
5284                                     struct extent_buffer *eb, int slot)
5285 {
5286         struct block_group_record *rec;
5287         int ret = 0;
5288
5289         rec = btrfs_new_block_group_record(eb, key, slot);
5290         ret = insert_block_group_record(block_group_cache, rec);
5291         if (ret) {
5292                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5293                         rec->objectid, rec->offset);
5294                 free(rec);
5295         }
5296
5297         return ret;
5298 }
5299
5300 struct device_extent_record *
5301 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5302                                struct btrfs_key *key, int slot)
5303 {
5304         struct device_extent_record *rec;
5305         struct btrfs_dev_extent *ptr;
5306
5307         rec = calloc(1, sizeof(*rec));
5308         if (!rec) {
5309                 fprintf(stderr, "memory allocation failed\n");
5310                 exit(-1);
5311         }
5312
5313         rec->cache.objectid = key->objectid;
5314         rec->cache.start = key->offset;
5315
5316         rec->generation = btrfs_header_generation(leaf);
5317
5318         rec->objectid = key->objectid;
5319         rec->type = key->type;
5320         rec->offset = key->offset;
5321
5322         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5323         rec->chunk_objecteid =
5324                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5325         rec->chunk_offset =
5326                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5327         rec->length = btrfs_dev_extent_length(leaf, ptr);
5328         rec->cache.size = rec->length;
5329
5330         INIT_LIST_HEAD(&rec->chunk_list);
5331         INIT_LIST_HEAD(&rec->device_list);
5332
5333         return rec;
5334 }
5335
5336 static int
5337 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5338                            struct btrfs_key *key, struct extent_buffer *eb,
5339                            int slot)
5340 {
5341         struct device_extent_record *rec;
5342         int ret;
5343
5344         rec = btrfs_new_device_extent_record(eb, key, slot);
5345         ret = insert_device_extent_record(dev_extent_cache, rec);
5346         if (ret) {
5347                 fprintf(stderr,
5348                         "Device extent[%llu, %llu, %llu] existed.\n",
5349                         rec->objectid, rec->offset, rec->length);
5350                 free(rec);
5351         }
5352
5353         return ret;
5354 }
5355
5356 static int process_extent_item(struct btrfs_root *root,
5357                                struct cache_tree *extent_cache,
5358                                struct extent_buffer *eb, int slot)
5359 {
5360         struct btrfs_extent_item *ei;
5361         struct btrfs_extent_inline_ref *iref;
5362         struct btrfs_extent_data_ref *dref;
5363         struct btrfs_shared_data_ref *sref;
5364         struct btrfs_key key;
5365         struct extent_record tmpl;
5366         unsigned long end;
5367         unsigned long ptr;
5368         int type;
5369         u32 item_size = btrfs_item_size_nr(eb, slot);
5370         u64 refs = 0;
5371         u64 offset;
5372         u64 num_bytes;
5373         int metadata = 0;
5374
5375         btrfs_item_key_to_cpu(eb, &key, slot);
5376
5377         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5378                 metadata = 1;
5379                 num_bytes = root->nodesize;
5380         } else {
5381                 num_bytes = key.offset;
5382         }
5383
5384         if (item_size < sizeof(*ei)) {
5385 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5386                 struct btrfs_extent_item_v0 *ei0;
5387                 BUG_ON(item_size != sizeof(*ei0));
5388                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5389                 refs = btrfs_extent_refs_v0(eb, ei0);
5390 #else
5391                 BUG();
5392 #endif
5393                 memset(&tmpl, 0, sizeof(tmpl));
5394                 tmpl.start = key.objectid;
5395                 tmpl.nr = num_bytes;
5396                 tmpl.extent_item_refs = refs;
5397                 tmpl.metadata = metadata;
5398                 tmpl.found_rec = 1;
5399                 tmpl.max_size = num_bytes;
5400
5401                 return add_extent_rec(extent_cache, &tmpl);
5402         }
5403
5404         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5405         refs = btrfs_extent_refs(eb, ei);
5406         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5407                 metadata = 1;
5408         else
5409                 metadata = 0;
5410
5411         memset(&tmpl, 0, sizeof(tmpl));
5412         tmpl.start = key.objectid;
5413         tmpl.nr = num_bytes;
5414         tmpl.extent_item_refs = refs;
5415         tmpl.metadata = metadata;
5416         tmpl.found_rec = 1;
5417         tmpl.max_size = num_bytes;
5418         add_extent_rec(extent_cache, &tmpl);
5419
5420         ptr = (unsigned long)(ei + 1);
5421         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5422             key.type == BTRFS_EXTENT_ITEM_KEY)
5423                 ptr += sizeof(struct btrfs_tree_block_info);
5424
5425         end = (unsigned long)ei + item_size;
5426         while (ptr < end) {
5427                 iref = (struct btrfs_extent_inline_ref *)ptr;
5428                 type = btrfs_extent_inline_ref_type(eb, iref);
5429                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5430                 switch (type) {
5431                 case BTRFS_TREE_BLOCK_REF_KEY:
5432                         add_tree_backref(extent_cache, key.objectid,
5433                                          0, offset, 0);
5434                         break;
5435                 case BTRFS_SHARED_BLOCK_REF_KEY:
5436                         add_tree_backref(extent_cache, key.objectid,
5437                                          offset, 0, 0);
5438                         break;
5439                 case BTRFS_EXTENT_DATA_REF_KEY:
5440                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5441                         add_data_backref(extent_cache, key.objectid, 0,
5442                                         btrfs_extent_data_ref_root(eb, dref),
5443                                         btrfs_extent_data_ref_objectid(eb,
5444                                                                        dref),
5445                                         btrfs_extent_data_ref_offset(eb, dref),
5446                                         btrfs_extent_data_ref_count(eb, dref),
5447                                         0, num_bytes);
5448                         break;
5449                 case BTRFS_SHARED_DATA_REF_KEY:
5450                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5451                         add_data_backref(extent_cache, key.objectid, offset,
5452                                         0, 0, 0,
5453                                         btrfs_shared_data_ref_count(eb, sref),
5454                                         0, num_bytes);
5455                         break;
5456                 default:
5457                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5458                                 key.objectid, key.type, num_bytes);
5459                         goto out;
5460                 }
5461                 ptr += btrfs_extent_inline_ref_size(type);
5462         }
5463         WARN_ON(ptr > end);
5464 out:
5465         return 0;
5466 }
5467
5468 static int check_cache_range(struct btrfs_root *root,
5469                              struct btrfs_block_group_cache *cache,
5470                              u64 offset, u64 bytes)
5471 {
5472         struct btrfs_free_space *entry;
5473         u64 *logical;
5474         u64 bytenr;
5475         int stripe_len;
5476         int i, nr, ret;
5477
5478         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5479                 bytenr = btrfs_sb_offset(i);
5480                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5481                                        cache->key.objectid, bytenr, 0,
5482                                        &logical, &nr, &stripe_len);
5483                 if (ret)
5484                         return ret;
5485
5486                 while (nr--) {
5487                         if (logical[nr] + stripe_len <= offset)
5488                                 continue;
5489                         if (offset + bytes <= logical[nr])
5490                                 continue;
5491                         if (logical[nr] == offset) {
5492                                 if (stripe_len >= bytes) {
5493                                         kfree(logical);
5494                                         return 0;
5495                                 }
5496                                 bytes -= stripe_len;
5497                                 offset += stripe_len;
5498                         } else if (logical[nr] < offset) {
5499                                 if (logical[nr] + stripe_len >=
5500                                     offset + bytes) {
5501                                         kfree(logical);
5502                                         return 0;
5503                                 }
5504                                 bytes = (offset + bytes) -
5505                                         (logical[nr] + stripe_len);
5506                                 offset = logical[nr] + stripe_len;
5507                         } else {
5508                                 /*
5509                                  * Could be tricky, the super may land in the
5510                                  * middle of the area we're checking.  First
5511                                  * check the easiest case, it's at the end.
5512                                  */
5513                                 if (logical[nr] + stripe_len >=
5514                                     bytes + offset) {
5515                                         bytes = logical[nr] - offset;
5516                                         continue;
5517                                 }
5518
5519                                 /* Check the left side */
5520                                 ret = check_cache_range(root, cache,
5521                                                         offset,
5522                                                         logical[nr] - offset);
5523                                 if (ret) {
5524                                         kfree(logical);
5525                                         return ret;
5526                                 }
5527
5528                                 /* Now we continue with the right side */
5529                                 bytes = (offset + bytes) -
5530                                         (logical[nr] + stripe_len);
5531                                 offset = logical[nr] + stripe_len;
5532                         }
5533                 }
5534
5535                 kfree(logical);
5536         }
5537
5538         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5539         if (!entry) {
5540                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5541                         offset, offset+bytes);
5542                 return -EINVAL;
5543         }
5544
5545         if (entry->offset != offset) {
5546                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5547                         entry->offset);
5548                 return -EINVAL;
5549         }
5550
5551         if (entry->bytes != bytes) {
5552                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5553                         bytes, entry->bytes, offset);
5554                 return -EINVAL;
5555         }
5556
5557         unlink_free_space(cache->free_space_ctl, entry);
5558         free(entry);
5559         return 0;
5560 }
5561
5562 static int verify_space_cache(struct btrfs_root *root,
5563                               struct btrfs_block_group_cache *cache)
5564 {
5565         struct btrfs_path *path;
5566         struct extent_buffer *leaf;
5567         struct btrfs_key key;
5568         u64 last;
5569         int ret = 0;
5570
5571         path = btrfs_alloc_path();
5572         if (!path)
5573                 return -ENOMEM;
5574
5575         root = root->fs_info->extent_root;
5576
5577         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5578
5579         key.objectid = last;
5580         key.offset = 0;
5581         key.type = BTRFS_EXTENT_ITEM_KEY;
5582
5583         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5584         if (ret < 0)
5585                 goto out;
5586         ret = 0;
5587         while (1) {
5588                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5589                         ret = btrfs_next_leaf(root, path);
5590                         if (ret < 0)
5591                                 goto out;
5592                         if (ret > 0) {
5593                                 ret = 0;
5594                                 break;
5595                         }
5596                 }
5597                 leaf = path->nodes[0];
5598                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5599                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5600                         break;
5601                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5602                     key.type != BTRFS_METADATA_ITEM_KEY) {
5603                         path->slots[0]++;
5604                         continue;
5605                 }
5606
5607                 if (last == key.objectid) {
5608                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5609                                 last = key.objectid + key.offset;
5610                         else
5611                                 last = key.objectid + root->nodesize;
5612                         path->slots[0]++;
5613                         continue;
5614                 }
5615
5616                 ret = check_cache_range(root, cache, last,
5617                                         key.objectid - last);
5618                 if (ret)
5619                         break;
5620                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5621                         last = key.objectid + key.offset;
5622                 else
5623                         last = key.objectid + root->nodesize;
5624                 path->slots[0]++;
5625         }
5626
5627         if (last < cache->key.objectid + cache->key.offset)
5628                 ret = check_cache_range(root, cache, last,
5629                                         cache->key.objectid +
5630                                         cache->key.offset - last);
5631
5632 out:
5633         btrfs_free_path(path);
5634
5635         if (!ret &&
5636             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5637                 fprintf(stderr, "There are still entries left in the space "
5638                         "cache\n");
5639                 ret = -EINVAL;
5640         }
5641
5642         return ret;
5643 }
5644
5645 static int check_space_cache(struct btrfs_root *root)
5646 {
5647         struct btrfs_block_group_cache *cache;
5648         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5649         int ret;
5650         int error = 0;
5651
5652         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5653             btrfs_super_generation(root->fs_info->super_copy) !=
5654             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5655                 printf("cache and super generation don't match, space cache "
5656                        "will be invalidated\n");
5657                 return 0;
5658         }
5659
5660         if (ctx.progress_enabled) {
5661                 ctx.tp = TASK_FREE_SPACE;
5662                 task_start(ctx.info);
5663         }
5664
5665         while (1) {
5666                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5667                 if (!cache)
5668                         break;
5669
5670                 start = cache->key.objectid + cache->key.offset;
5671                 if (!cache->free_space_ctl) {
5672                         if (btrfs_init_free_space_ctl(cache,
5673                                                       root->sectorsize)) {
5674                                 ret = -ENOMEM;
5675                                 break;
5676                         }
5677                 } else {
5678                         btrfs_remove_free_space_cache(cache);
5679                 }
5680
5681                 if (btrfs_fs_compat_ro(root->fs_info,
5682                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5683                         ret = exclude_super_stripes(root, cache);
5684                         if (ret) {
5685                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5686                                         strerror(-ret));
5687                                 error++;
5688                                 continue;
5689                         }
5690                         ret = load_free_space_tree(root->fs_info, cache);
5691                         free_excluded_extents(root, cache);
5692                         if (ret < 0) {
5693                                 fprintf(stderr, "could not load free space tree: %s\n",
5694                                         strerror(-ret));
5695                                 error++;
5696                                 continue;
5697                         }
5698                         error += ret;
5699                 } else {
5700                         ret = load_free_space_cache(root->fs_info, cache);
5701                         if (!ret)
5702                                 continue;
5703                 }
5704
5705                 ret = verify_space_cache(root, cache);
5706                 if (ret) {
5707                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5708                                 cache->key.objectid);
5709                         error++;
5710                 }
5711         }
5712
5713         task_stop(ctx.info);
5714
5715         return error ? -EINVAL : 0;
5716 }
5717
5718 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5719                         u64 num_bytes, unsigned long leaf_offset,
5720                         struct extent_buffer *eb) {
5721
5722         u64 offset = 0;
5723         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5724         char *data;
5725         unsigned long csum_offset;
5726         u32 csum;
5727         u32 csum_expected;
5728         u64 read_len;
5729         u64 data_checked = 0;
5730         u64 tmp;
5731         int ret = 0;
5732         int mirror;
5733         int num_copies;
5734
5735         if (num_bytes % root->sectorsize)
5736                 return -EINVAL;
5737
5738         data = malloc(num_bytes);
5739         if (!data)
5740                 return -ENOMEM;
5741
5742         while (offset < num_bytes) {
5743                 mirror = 0;
5744 again:
5745                 read_len = num_bytes - offset;
5746                 /* read as much space once a time */
5747                 ret = read_extent_data(root, data + offset,
5748                                 bytenr + offset, &read_len, mirror);
5749                 if (ret)
5750                         goto out;
5751                 data_checked = 0;
5752                 /* verify every 4k data's checksum */
5753                 while (data_checked < read_len) {
5754                         csum = ~(u32)0;
5755                         tmp = offset + data_checked;
5756
5757                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5758                                                csum, root->sectorsize);
5759                         btrfs_csum_final(csum, (char *)&csum);
5760
5761                         csum_offset = leaf_offset +
5762                                  tmp / root->sectorsize * csum_size;
5763                         read_extent_buffer(eb, (char *)&csum_expected,
5764                                            csum_offset, csum_size);
5765                         /* try another mirror */
5766                         if (csum != csum_expected) {
5767                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5768                                                 mirror, bytenr + tmp,
5769                                                 csum, csum_expected);
5770                                 num_copies = btrfs_num_copies(
5771                                                 &root->fs_info->mapping_tree,
5772                                                 bytenr, num_bytes);
5773                                 if (mirror < num_copies - 1) {
5774                                         mirror += 1;
5775                                         goto again;
5776                                 }
5777                         }
5778                         data_checked += root->sectorsize;
5779                 }
5780                 offset += read_len;
5781         }
5782 out:
5783         free(data);
5784         return ret;
5785 }
5786
5787 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5788                                u64 num_bytes)
5789 {
5790         struct btrfs_path *path;
5791         struct extent_buffer *leaf;
5792         struct btrfs_key key;
5793         int ret;
5794
5795         path = btrfs_alloc_path();
5796         if (!path) {
5797                 fprintf(stderr, "Error allocating path\n");
5798                 return -ENOMEM;
5799         }
5800
5801         key.objectid = bytenr;
5802         key.type = BTRFS_EXTENT_ITEM_KEY;
5803         key.offset = (u64)-1;
5804
5805 again:
5806         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5807                                 0, 0);
5808         if (ret < 0) {
5809                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5810                 btrfs_free_path(path);
5811                 return ret;
5812         } else if (ret) {
5813                 if (path->slots[0] > 0) {
5814                         path->slots[0]--;
5815                 } else {
5816                         ret = btrfs_prev_leaf(root, path);
5817                         if (ret < 0) {
5818                                 goto out;
5819                         } else if (ret > 0) {
5820                                 ret = 0;
5821                                 goto out;
5822                         }
5823                 }
5824         }
5825
5826         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5827
5828         /*
5829          * Block group items come before extent items if they have the same
5830          * bytenr, so walk back one more just in case.  Dear future traveller,
5831          * first congrats on mastering time travel.  Now if it's not too much
5832          * trouble could you go back to 2006 and tell Chris to make the
5833          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5834          * EXTENT_ITEM_KEY please?
5835          */
5836         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5837                 if (path->slots[0] > 0) {
5838                         path->slots[0]--;
5839                 } else {
5840                         ret = btrfs_prev_leaf(root, path);
5841                         if (ret < 0) {
5842                                 goto out;
5843                         } else if (ret > 0) {
5844                                 ret = 0;
5845                                 goto out;
5846                         }
5847                 }
5848                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5849         }
5850
5851         while (num_bytes) {
5852                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5853                         ret = btrfs_next_leaf(root, path);
5854                         if (ret < 0) {
5855                                 fprintf(stderr, "Error going to next leaf "
5856                                         "%d\n", ret);
5857                                 btrfs_free_path(path);
5858                                 return ret;
5859                         } else if (ret) {
5860                                 break;
5861                         }
5862                 }
5863                 leaf = path->nodes[0];
5864                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5865                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5866                         path->slots[0]++;
5867                         continue;
5868                 }
5869                 if (key.objectid + key.offset < bytenr) {
5870                         path->slots[0]++;
5871                         continue;
5872                 }
5873                 if (key.objectid > bytenr + num_bytes)
5874                         break;
5875
5876                 if (key.objectid == bytenr) {
5877                         if (key.offset >= num_bytes) {
5878                                 num_bytes = 0;
5879                                 break;
5880                         }
5881                         num_bytes -= key.offset;
5882                         bytenr += key.offset;
5883                 } else if (key.objectid < bytenr) {
5884                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5885                                 num_bytes = 0;
5886                                 break;
5887                         }
5888                         num_bytes = (bytenr + num_bytes) -
5889                                 (key.objectid + key.offset);
5890                         bytenr = key.objectid + key.offset;
5891                 } else {
5892                         if (key.objectid + key.offset < bytenr + num_bytes) {
5893                                 u64 new_start = key.objectid + key.offset;
5894                                 u64 new_bytes = bytenr + num_bytes - new_start;
5895
5896                                 /*
5897                                  * Weird case, the extent is in the middle of
5898                                  * our range, we'll have to search one side
5899                                  * and then the other.  Not sure if this happens
5900                                  * in real life, but no harm in coding it up
5901                                  * anyway just in case.
5902                                  */
5903                                 btrfs_release_path(path);
5904                                 ret = check_extent_exists(root, new_start,
5905                                                           new_bytes);
5906                                 if (ret) {
5907                                         fprintf(stderr, "Right section didn't "
5908                                                 "have a record\n");
5909                                         break;
5910                                 }
5911                                 num_bytes = key.objectid - bytenr;
5912                                 goto again;
5913                         }
5914                         num_bytes = key.objectid - bytenr;
5915                 }
5916                 path->slots[0]++;
5917         }
5918         ret = 0;
5919
5920 out:
5921         if (num_bytes && !ret) {
5922                 fprintf(stderr, "There are no extents for csum range "
5923                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5924                 ret = 1;
5925         }
5926
5927         btrfs_free_path(path);
5928         return ret;
5929 }
5930
5931 static int check_csums(struct btrfs_root *root)
5932 {
5933         struct btrfs_path *path;
5934         struct extent_buffer *leaf;
5935         struct btrfs_key key;
5936         u64 offset = 0, num_bytes = 0;
5937         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5938         int errors = 0;
5939         int ret;
5940         u64 data_len;
5941         unsigned long leaf_offset;
5942
5943         root = root->fs_info->csum_root;
5944         if (!extent_buffer_uptodate(root->node)) {
5945                 fprintf(stderr, "No valid csum tree found\n");
5946                 return -ENOENT;
5947         }
5948
5949         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5950         key.type = BTRFS_EXTENT_CSUM_KEY;
5951         key.offset = 0;
5952
5953         path = btrfs_alloc_path();
5954         if (!path)
5955                 return -ENOMEM;
5956
5957         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5958         if (ret < 0) {
5959                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5960                 btrfs_free_path(path);
5961                 return ret;
5962         }
5963
5964         if (ret > 0 && path->slots[0])
5965                 path->slots[0]--;
5966         ret = 0;
5967
5968         while (1) {
5969                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5970                         ret = btrfs_next_leaf(root, path);
5971                         if (ret < 0) {
5972                                 fprintf(stderr, "Error going to next leaf "
5973                                         "%d\n", ret);
5974                                 break;
5975                         }
5976                         if (ret)
5977                                 break;
5978                 }
5979                 leaf = path->nodes[0];
5980
5981                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5982                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5983                         path->slots[0]++;
5984                         continue;
5985                 }
5986
5987                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5988                               csum_size) * root->sectorsize;
5989                 if (!check_data_csum)
5990                         goto skip_csum_check;
5991                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5992                 ret = check_extent_csums(root, key.offset, data_len,
5993                                          leaf_offset, leaf);
5994                 if (ret)
5995                         break;
5996 skip_csum_check:
5997                 if (!num_bytes) {
5998                         offset = key.offset;
5999                 } else if (key.offset != offset + num_bytes) {
6000                         ret = check_extent_exists(root, offset, num_bytes);
6001                         if (ret) {
6002                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6003                                         "there is no extent record\n",
6004                                         offset, offset+num_bytes);
6005                                 errors++;
6006                         }
6007                         offset = key.offset;
6008                         num_bytes = 0;
6009                 }
6010                 num_bytes += data_len;
6011                 path->slots[0]++;
6012         }
6013
6014         btrfs_free_path(path);
6015         return errors;
6016 }
6017
6018 static int is_dropped_key(struct btrfs_key *key,
6019                           struct btrfs_key *drop_key) {
6020         if (key->objectid < drop_key->objectid)
6021                 return 1;
6022         else if (key->objectid == drop_key->objectid) {
6023                 if (key->type < drop_key->type)
6024                         return 1;
6025                 else if (key->type == drop_key->type) {
6026                         if (key->offset < drop_key->offset)
6027                                 return 1;
6028                 }
6029         }
6030         return 0;
6031 }
6032
6033 /*
6034  * Here are the rules for FULL_BACKREF.
6035  *
6036  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6037  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6038  *      FULL_BACKREF set.
6039  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6040  *    if it happened after the relocation occurred since we'll have dropped the
6041  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6042  *    have no real way to know for sure.
6043  *
6044  * We process the blocks one root at a time, and we start from the lowest root
6045  * objectid and go to the highest.  So we can just lookup the owner backref for
6046  * the record and if we don't find it then we know it doesn't exist and we have
6047  * a FULL BACKREF.
6048  *
6049  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6050  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6051  * be set or not and then we can check later once we've gathered all the refs.
6052  */
6053 static int calc_extent_flag(struct btrfs_root *root,
6054                            struct cache_tree *extent_cache,
6055                            struct extent_buffer *buf,
6056                            struct root_item_record *ri,
6057                            u64 *flags)
6058 {
6059         struct extent_record *rec;
6060         struct cache_extent *cache;
6061         struct tree_backref *tback;
6062         u64 owner = 0;
6063
6064         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6065         /* we have added this extent before */
6066         BUG_ON(!cache);
6067         rec = container_of(cache, struct extent_record, cache);
6068
6069         /*
6070          * Except file/reloc tree, we can not have
6071          * FULL BACKREF MODE
6072          */
6073         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6074                 goto normal;
6075         /*
6076          * root node
6077          */
6078         if (buf->start == ri->bytenr)
6079                 goto normal;
6080
6081         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6082                 goto full_backref;
6083
6084         owner = btrfs_header_owner(buf);
6085         if (owner == ri->objectid)
6086                 goto normal;
6087
6088         tback = find_tree_backref(rec, 0, owner);
6089         if (!tback)
6090                 goto full_backref;
6091 normal:
6092         *flags = 0;
6093         if (rec->flag_block_full_backref != FLAG_UNSET &&
6094             rec->flag_block_full_backref != 0)
6095                 rec->bad_full_backref = 1;
6096         return 0;
6097 full_backref:
6098         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6099         if (rec->flag_block_full_backref != FLAG_UNSET &&
6100             rec->flag_block_full_backref != 1)
6101                 rec->bad_full_backref = 1;
6102         return 0;
6103 }
6104
6105 static int run_next_block(struct btrfs_root *root,
6106                           struct block_info *bits,
6107                           int bits_nr,
6108                           u64 *last,
6109                           struct cache_tree *pending,
6110                           struct cache_tree *seen,
6111                           struct cache_tree *reada,
6112                           struct cache_tree *nodes,
6113                           struct cache_tree *extent_cache,
6114                           struct cache_tree *chunk_cache,
6115                           struct rb_root *dev_cache,
6116                           struct block_group_tree *block_group_cache,
6117                           struct device_extent_tree *dev_extent_cache,
6118                           struct root_item_record *ri)
6119 {
6120         struct extent_buffer *buf;
6121         struct extent_record *rec = NULL;
6122         u64 bytenr;
6123         u32 size;
6124         u64 parent;
6125         u64 owner;
6126         u64 flags;
6127         u64 ptr;
6128         u64 gen = 0;
6129         int ret = 0;
6130         int i;
6131         int nritems;
6132         struct btrfs_key key;
6133         struct cache_extent *cache;
6134         int reada_bits;
6135
6136         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6137                                     bits_nr, &reada_bits);
6138         if (nritems == 0)
6139                 return 1;
6140
6141         if (!reada_bits) {
6142                 for(i = 0; i < nritems; i++) {
6143                         ret = add_cache_extent(reada, bits[i].start,
6144                                                bits[i].size);
6145                         if (ret == -EEXIST)
6146                                 continue;
6147
6148                         /* fixme, get the parent transid */
6149                         readahead_tree_block(root, bits[i].start,
6150                                              bits[i].size, 0);
6151                 }
6152         }
6153         *last = bits[0].start;
6154         bytenr = bits[0].start;
6155         size = bits[0].size;
6156
6157         cache = lookup_cache_extent(pending, bytenr, size);
6158         if (cache) {
6159                 remove_cache_extent(pending, cache);
6160                 free(cache);
6161         }
6162         cache = lookup_cache_extent(reada, bytenr, size);
6163         if (cache) {
6164                 remove_cache_extent(reada, cache);
6165                 free(cache);
6166         }
6167         cache = lookup_cache_extent(nodes, bytenr, size);
6168         if (cache) {
6169                 remove_cache_extent(nodes, cache);
6170                 free(cache);
6171         }
6172         cache = lookup_cache_extent(extent_cache, bytenr, size);
6173         if (cache) {
6174                 rec = container_of(cache, struct extent_record, cache);
6175                 gen = rec->parent_generation;
6176         }
6177
6178         /* fixme, get the real parent transid */
6179         buf = read_tree_block(root, bytenr, size, gen);
6180         if (!extent_buffer_uptodate(buf)) {
6181                 record_bad_block_io(root->fs_info,
6182                                     extent_cache, bytenr, size);
6183                 goto out;
6184         }
6185
6186         nritems = btrfs_header_nritems(buf);
6187
6188         flags = 0;
6189         if (!init_extent_tree) {
6190                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6191                                        btrfs_header_level(buf), 1, NULL,
6192                                        &flags);
6193                 if (ret < 0) {
6194                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6195                         if (ret < 0) {
6196                                 fprintf(stderr, "Couldn't calc extent flags\n");
6197                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6198                         }
6199                 }
6200         } else {
6201                 flags = 0;
6202                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6203                 if (ret < 0) {
6204                         fprintf(stderr, "Couldn't calc extent flags\n");
6205                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6206                 }
6207         }
6208
6209         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6210                 if (ri != NULL &&
6211                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6212                     ri->objectid == btrfs_header_owner(buf)) {
6213                         /*
6214                          * Ok we got to this block from it's original owner and
6215                          * we have FULL_BACKREF set.  Relocation can leave
6216                          * converted blocks over so this is altogether possible,
6217                          * however it's not possible if the generation > the
6218                          * last snapshot, so check for this case.
6219                          */
6220                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6221                             btrfs_header_generation(buf) > ri->last_snapshot) {
6222                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6223                                 rec->bad_full_backref = 1;
6224                         }
6225                 }
6226         } else {
6227                 if (ri != NULL &&
6228                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6229                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6230                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6231                         rec->bad_full_backref = 1;
6232                 }
6233         }
6234
6235         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6236                 rec->flag_block_full_backref = 1;
6237                 parent = bytenr;
6238                 owner = 0;
6239         } else {
6240                 rec->flag_block_full_backref = 0;
6241                 parent = 0;
6242                 owner = btrfs_header_owner(buf);
6243         }
6244
6245         ret = check_block(root, extent_cache, buf, flags);
6246         if (ret)
6247                 goto out;
6248
6249         if (btrfs_is_leaf(buf)) {
6250                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6251                 for (i = 0; i < nritems; i++) {
6252                         struct btrfs_file_extent_item *fi;
6253                         btrfs_item_key_to_cpu(buf, &key, i);
6254                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6255                                 process_extent_item(root, extent_cache, buf,
6256                                                     i);
6257                                 continue;
6258                         }
6259                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6260                                 process_extent_item(root, extent_cache, buf,
6261                                                     i);
6262                                 continue;
6263                         }
6264                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6265                                 total_csum_bytes +=
6266                                         btrfs_item_size_nr(buf, i);
6267                                 continue;
6268                         }
6269                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6270                                 process_chunk_item(chunk_cache, &key, buf, i);
6271                                 continue;
6272                         }
6273                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6274                                 process_device_item(dev_cache, &key, buf, i);
6275                                 continue;
6276                         }
6277                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6278                                 process_block_group_item(block_group_cache,
6279                                         &key, buf, i);
6280                                 continue;
6281                         }
6282                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6283                                 process_device_extent_item(dev_extent_cache,
6284                                         &key, buf, i);
6285                                 continue;
6286
6287                         }
6288                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6289 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6290                                 process_extent_ref_v0(extent_cache, buf, i);
6291 #else
6292                                 BUG();
6293 #endif
6294                                 continue;
6295                         }
6296
6297                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6298                                 add_tree_backref(extent_cache, key.objectid, 0,
6299                                                  key.offset, 0);
6300                                 continue;
6301                         }
6302                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6303                                 add_tree_backref(extent_cache, key.objectid,
6304                                                  key.offset, 0, 0);
6305                                 continue;
6306                         }
6307                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6308                                 struct btrfs_extent_data_ref *ref;
6309                                 ref = btrfs_item_ptr(buf, i,
6310                                                 struct btrfs_extent_data_ref);
6311                                 add_data_backref(extent_cache,
6312                                         key.objectid, 0,
6313                                         btrfs_extent_data_ref_root(buf, ref),
6314                                         btrfs_extent_data_ref_objectid(buf,
6315                                                                        ref),
6316                                         btrfs_extent_data_ref_offset(buf, ref),
6317                                         btrfs_extent_data_ref_count(buf, ref),
6318                                         0, root->sectorsize);
6319                                 continue;
6320                         }
6321                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6322                                 struct btrfs_shared_data_ref *ref;
6323                                 ref = btrfs_item_ptr(buf, i,
6324                                                 struct btrfs_shared_data_ref);
6325                                 add_data_backref(extent_cache,
6326                                         key.objectid, key.offset, 0, 0, 0,
6327                                         btrfs_shared_data_ref_count(buf, ref),
6328                                         0, root->sectorsize);
6329                                 continue;
6330                         }
6331                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6332                                 struct bad_item *bad;
6333
6334                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6335                                         continue;
6336                                 if (!owner)
6337                                         continue;
6338                                 bad = malloc(sizeof(struct bad_item));
6339                                 if (!bad)
6340                                         continue;
6341                                 INIT_LIST_HEAD(&bad->list);
6342                                 memcpy(&bad->key, &key,
6343                                        sizeof(struct btrfs_key));
6344                                 bad->root_id = owner;
6345                                 list_add_tail(&bad->list, &delete_items);
6346                                 continue;
6347                         }
6348                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6349                                 continue;
6350                         fi = btrfs_item_ptr(buf, i,
6351                                             struct btrfs_file_extent_item);
6352                         if (btrfs_file_extent_type(buf, fi) ==
6353                             BTRFS_FILE_EXTENT_INLINE)
6354                                 continue;
6355                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6356                                 continue;
6357
6358                         data_bytes_allocated +=
6359                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6360                         if (data_bytes_allocated < root->sectorsize) {
6361                                 abort();
6362                         }
6363                         data_bytes_referenced +=
6364                                 btrfs_file_extent_num_bytes(buf, fi);
6365                         add_data_backref(extent_cache,
6366                                 btrfs_file_extent_disk_bytenr(buf, fi),
6367                                 parent, owner, key.objectid, key.offset -
6368                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6369                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6370                 }
6371         } else {
6372                 int level;
6373                 struct btrfs_key first_key;
6374
6375                 first_key.objectid = 0;
6376
6377                 if (nritems > 0)
6378                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6379                 level = btrfs_header_level(buf);
6380                 for (i = 0; i < nritems; i++) {
6381                         struct extent_record tmpl;
6382
6383                         ptr = btrfs_node_blockptr(buf, i);
6384                         size = root->nodesize;
6385                         btrfs_node_key_to_cpu(buf, &key, i);
6386                         if (ri != NULL) {
6387                                 if ((level == ri->drop_level)
6388                                     && is_dropped_key(&key, &ri->drop_key)) {
6389                                         continue;
6390                                 }
6391                         }
6392
6393                         memset(&tmpl, 0, sizeof(tmpl));
6394                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6395                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6396                         tmpl.start = ptr;
6397                         tmpl.nr = size;
6398                         tmpl.refs = 1;
6399                         tmpl.metadata = 1;
6400                         tmpl.max_size = size;
6401                         ret = add_extent_rec(extent_cache, &tmpl);
6402                         BUG_ON(ret);
6403
6404                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6405
6406                         if (level > 1) {
6407                                 add_pending(nodes, seen, ptr, size);
6408                         } else {
6409                                 add_pending(pending, seen, ptr, size);
6410                         }
6411                 }
6412                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6413                                       nritems) * sizeof(struct btrfs_key_ptr);
6414         }
6415         total_btree_bytes += buf->len;
6416         if (fs_root_objectid(btrfs_header_owner(buf)))
6417                 total_fs_tree_bytes += buf->len;
6418         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6419                 total_extent_tree_bytes += buf->len;
6420         if (!found_old_backref &&
6421             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6422             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6423             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6424                 found_old_backref = 1;
6425 out:
6426         free_extent_buffer(buf);
6427         return ret;
6428 }
6429
6430 static int add_root_to_pending(struct extent_buffer *buf,
6431                                struct cache_tree *extent_cache,
6432                                struct cache_tree *pending,
6433                                struct cache_tree *seen,
6434                                struct cache_tree *nodes,
6435                                u64 objectid)
6436 {
6437         struct extent_record tmpl;
6438
6439         if (btrfs_header_level(buf) > 0)
6440                 add_pending(nodes, seen, buf->start, buf->len);
6441         else
6442                 add_pending(pending, seen, buf->start, buf->len);
6443
6444         memset(&tmpl, 0, sizeof(tmpl));
6445         tmpl.start = buf->start;
6446         tmpl.nr = buf->len;
6447         tmpl.is_root = 1;
6448         tmpl.refs = 1;
6449         tmpl.metadata = 1;
6450         tmpl.max_size = buf->len;
6451         add_extent_rec(extent_cache, &tmpl);
6452
6453         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6454             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6455                 add_tree_backref(extent_cache, buf->start, buf->start,
6456                                  0, 1);
6457         else
6458                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6459         return 0;
6460 }
6461
6462 /* as we fix the tree, we might be deleting blocks that
6463  * we're tracking for repair.  This hook makes sure we
6464  * remove any backrefs for blocks as we are fixing them.
6465  */
6466 static int free_extent_hook(struct btrfs_trans_handle *trans,
6467                             struct btrfs_root *root,
6468                             u64 bytenr, u64 num_bytes, u64 parent,
6469                             u64 root_objectid, u64 owner, u64 offset,
6470                             int refs_to_drop)
6471 {
6472         struct extent_record *rec;
6473         struct cache_extent *cache;
6474         int is_data;
6475         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6476
6477         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6478         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6479         if (!cache)
6480                 return 0;
6481
6482         rec = container_of(cache, struct extent_record, cache);
6483         if (is_data) {
6484                 struct data_backref *back;
6485                 back = find_data_backref(rec, parent, root_objectid, owner,
6486                                          offset, 1, bytenr, num_bytes);
6487                 if (!back)
6488                         goto out;
6489                 if (back->node.found_ref) {
6490                         back->found_ref -= refs_to_drop;
6491                         if (rec->refs)
6492                                 rec->refs -= refs_to_drop;
6493                 }
6494                 if (back->node.found_extent_tree) {
6495                         back->num_refs -= refs_to_drop;
6496                         if (rec->extent_item_refs)
6497                                 rec->extent_item_refs -= refs_to_drop;
6498                 }
6499                 if (back->found_ref == 0)
6500                         back->node.found_ref = 0;
6501                 if (back->num_refs == 0)
6502                         back->node.found_extent_tree = 0;
6503
6504                 if (!back->node.found_extent_tree && back->node.found_ref) {
6505                         rb_erase(&back->node.node, &rec->backref_tree);
6506                         free(back);
6507                 }
6508         } else {
6509                 struct tree_backref *back;
6510                 back = find_tree_backref(rec, parent, root_objectid);
6511                 if (!back)
6512                         goto out;
6513                 if (back->node.found_ref) {
6514                         if (rec->refs)
6515                                 rec->refs--;
6516                         back->node.found_ref = 0;
6517                 }
6518                 if (back->node.found_extent_tree) {
6519                         if (rec->extent_item_refs)
6520                                 rec->extent_item_refs--;
6521                         back->node.found_extent_tree = 0;
6522                 }
6523                 if (!back->node.found_extent_tree && back->node.found_ref) {
6524                         rb_erase(&back->node.node, &rec->backref_tree);
6525                         free(back);
6526                 }
6527         }
6528         maybe_free_extent_rec(extent_cache, rec);
6529 out:
6530         return 0;
6531 }
6532
6533 static int delete_extent_records(struct btrfs_trans_handle *trans,
6534                                  struct btrfs_root *root,
6535                                  struct btrfs_path *path,
6536                                  u64 bytenr, u64 new_len)
6537 {
6538         struct btrfs_key key;
6539         struct btrfs_key found_key;
6540         struct extent_buffer *leaf;
6541         int ret;
6542         int slot;
6543
6544
6545         key.objectid = bytenr;
6546         key.type = (u8)-1;
6547         key.offset = (u64)-1;
6548
6549         while(1) {
6550                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6551                                         &key, path, 0, 1);
6552                 if (ret < 0)
6553                         break;
6554
6555                 if (ret > 0) {
6556                         ret = 0;
6557                         if (path->slots[0] == 0)
6558                                 break;
6559                         path->slots[0]--;
6560                 }
6561                 ret = 0;
6562
6563                 leaf = path->nodes[0];
6564                 slot = path->slots[0];
6565
6566                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6567                 if (found_key.objectid != bytenr)
6568                         break;
6569
6570                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6571                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6572                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6573                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6574                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6575                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6576                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6577                         btrfs_release_path(path);
6578                         if (found_key.type == 0) {
6579                                 if (found_key.offset == 0)
6580                                         break;
6581                                 key.offset = found_key.offset - 1;
6582                                 key.type = found_key.type;
6583                         }
6584                         key.type = found_key.type - 1;
6585                         key.offset = (u64)-1;
6586                         continue;
6587                 }
6588
6589                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6590                         found_key.objectid, found_key.type, found_key.offset);
6591
6592                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6593                 if (ret)
6594                         break;
6595                 btrfs_release_path(path);
6596
6597                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6598                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6599                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6600                                 found_key.offset : root->nodesize;
6601
6602                         ret = btrfs_update_block_group(trans, root, bytenr,
6603                                                        bytes, 0, 0);
6604                         if (ret)
6605                                 break;
6606                 }
6607         }
6608
6609         btrfs_release_path(path);
6610         return ret;
6611 }
6612
6613 /*
6614  * for a single backref, this will allocate a new extent
6615  * and add the backref to it.
6616  */
6617 static int record_extent(struct btrfs_trans_handle *trans,
6618                          struct btrfs_fs_info *info,
6619                          struct btrfs_path *path,
6620                          struct extent_record *rec,
6621                          struct extent_backref *back,
6622                          int allocated, u64 flags)
6623 {
6624         int ret;
6625         struct btrfs_root *extent_root = info->extent_root;
6626         struct extent_buffer *leaf;
6627         struct btrfs_key ins_key;
6628         struct btrfs_extent_item *ei;
6629         struct tree_backref *tback;
6630         struct data_backref *dback;
6631         struct btrfs_tree_block_info *bi;
6632
6633         if (!back->is_data)
6634                 rec->max_size = max_t(u64, rec->max_size,
6635                                     info->extent_root->nodesize);
6636
6637         if (!allocated) {
6638                 u32 item_size = sizeof(*ei);
6639
6640                 if (!back->is_data)
6641                         item_size += sizeof(*bi);
6642
6643                 ins_key.objectid = rec->start;
6644                 ins_key.offset = rec->max_size;
6645                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6646
6647                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6648                                         &ins_key, item_size);
6649                 if (ret)
6650                         goto fail;
6651
6652                 leaf = path->nodes[0];
6653                 ei = btrfs_item_ptr(leaf, path->slots[0],
6654                                     struct btrfs_extent_item);
6655
6656                 btrfs_set_extent_refs(leaf, ei, 0);
6657                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6658
6659                 if (back->is_data) {
6660                         btrfs_set_extent_flags(leaf, ei,
6661                                                BTRFS_EXTENT_FLAG_DATA);
6662                 } else {
6663                         struct btrfs_disk_key copy_key;;
6664
6665                         tback = to_tree_backref(back);
6666                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6667                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6668                                              sizeof(*bi));
6669
6670                         btrfs_set_disk_key_objectid(&copy_key,
6671                                                     rec->info_objectid);
6672                         btrfs_set_disk_key_type(&copy_key, 0);
6673                         btrfs_set_disk_key_offset(&copy_key, 0);
6674
6675                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6676                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6677
6678                         btrfs_set_extent_flags(leaf, ei,
6679                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6680                 }
6681
6682                 btrfs_mark_buffer_dirty(leaf);
6683                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6684                                                rec->max_size, 1, 0);
6685                 if (ret)
6686                         goto fail;
6687                 btrfs_release_path(path);
6688         }
6689
6690         if (back->is_data) {
6691                 u64 parent;
6692                 int i;
6693
6694                 dback = to_data_backref(back);
6695                 if (back->full_backref)
6696                         parent = dback->parent;
6697                 else
6698                         parent = 0;
6699
6700                 for (i = 0; i < dback->found_ref; i++) {
6701                         /* if parent != 0, we're doing a full backref
6702                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6703                          * just makes the backref allocator create a data
6704                          * backref
6705                          */
6706                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6707                                                    rec->start, rec->max_size,
6708                                                    parent,
6709                                                    dback->root,
6710                                                    parent ?
6711                                                    BTRFS_FIRST_FREE_OBJECTID :
6712                                                    dback->owner,
6713                                                    dback->offset);
6714                         if (ret)
6715                                 break;
6716                 }
6717                 fprintf(stderr, "adding new data backref"
6718                                 " on %llu %s %llu owner %llu"
6719                                 " offset %llu found %d\n",
6720                                 (unsigned long long)rec->start,
6721                                 back->full_backref ?
6722                                 "parent" : "root",
6723                                 back->full_backref ?
6724                                 (unsigned long long)parent :
6725                                 (unsigned long long)dback->root,
6726                                 (unsigned long long)dback->owner,
6727                                 (unsigned long long)dback->offset,
6728                                 dback->found_ref);
6729         } else {
6730                 u64 parent;
6731
6732                 tback = to_tree_backref(back);
6733                 if (back->full_backref)
6734                         parent = tback->parent;
6735                 else
6736                         parent = 0;
6737
6738                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6739                                            rec->start, rec->max_size,
6740                                            parent, tback->root, 0, 0);
6741                 fprintf(stderr, "adding new tree backref on "
6742                         "start %llu len %llu parent %llu root %llu\n",
6743                         rec->start, rec->max_size, parent, tback->root);
6744         }
6745 fail:
6746         btrfs_release_path(path);
6747         return ret;
6748 }
6749
6750 static struct extent_entry *find_entry(struct list_head *entries,
6751                                        u64 bytenr, u64 bytes)
6752 {
6753         struct extent_entry *entry = NULL;
6754
6755         list_for_each_entry(entry, entries, list) {
6756                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6757                         return entry;
6758         }
6759
6760         return NULL;
6761 }
6762
6763 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6764 {
6765         struct extent_entry *entry, *best = NULL, *prev = NULL;
6766
6767         list_for_each_entry(entry, entries, list) {
6768                 if (!prev) {
6769                         prev = entry;
6770                         continue;
6771                 }
6772
6773                 /*
6774                  * If there are as many broken entries as entries then we know
6775                  * not to trust this particular entry.
6776                  */
6777                 if (entry->broken == entry->count)
6778                         continue;
6779
6780                 /*
6781                  * If our current entry == best then we can't be sure our best
6782                  * is really the best, so we need to keep searching.
6783                  */
6784                 if (best && best->count == entry->count) {
6785                         prev = entry;
6786                         best = NULL;
6787                         continue;
6788                 }
6789
6790                 /* Prev == entry, not good enough, have to keep searching */
6791                 if (!prev->broken && prev->count == entry->count)
6792                         continue;
6793
6794                 if (!best)
6795                         best = (prev->count > entry->count) ? prev : entry;
6796                 else if (best->count < entry->count)
6797                         best = entry;
6798                 prev = entry;
6799         }
6800
6801         return best;
6802 }
6803
6804 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6805                       struct data_backref *dback, struct extent_entry *entry)
6806 {
6807         struct btrfs_trans_handle *trans;
6808         struct btrfs_root *root;
6809         struct btrfs_file_extent_item *fi;
6810         struct extent_buffer *leaf;
6811         struct btrfs_key key;
6812         u64 bytenr, bytes;
6813         int ret, err;
6814
6815         key.objectid = dback->root;
6816         key.type = BTRFS_ROOT_ITEM_KEY;
6817         key.offset = (u64)-1;
6818         root = btrfs_read_fs_root(info, &key);
6819         if (IS_ERR(root)) {
6820                 fprintf(stderr, "Couldn't find root for our ref\n");
6821                 return -EINVAL;
6822         }
6823
6824         /*
6825          * The backref points to the original offset of the extent if it was
6826          * split, so we need to search down to the offset we have and then walk
6827          * forward until we find the backref we're looking for.
6828          */
6829         key.objectid = dback->owner;
6830         key.type = BTRFS_EXTENT_DATA_KEY;
6831         key.offset = dback->offset;
6832         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6833         if (ret < 0) {
6834                 fprintf(stderr, "Error looking up ref %d\n", ret);
6835                 return ret;
6836         }
6837
6838         while (1) {
6839                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6840                         ret = btrfs_next_leaf(root, path);
6841                         if (ret) {
6842                                 fprintf(stderr, "Couldn't find our ref, next\n");
6843                                 return -EINVAL;
6844                         }
6845                 }
6846                 leaf = path->nodes[0];
6847                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6848                 if (key.objectid != dback->owner ||
6849                     key.type != BTRFS_EXTENT_DATA_KEY) {
6850                         fprintf(stderr, "Couldn't find our ref, search\n");
6851                         return -EINVAL;
6852                 }
6853                 fi = btrfs_item_ptr(leaf, path->slots[0],
6854                                     struct btrfs_file_extent_item);
6855                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6856                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6857
6858                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6859                         break;
6860                 path->slots[0]++;
6861         }
6862
6863         btrfs_release_path(path);
6864
6865         trans = btrfs_start_transaction(root, 1);
6866         if (IS_ERR(trans))
6867                 return PTR_ERR(trans);
6868
6869         /*
6870          * Ok we have the key of the file extent we want to fix, now we can cow
6871          * down to the thing and fix it.
6872          */
6873         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6874         if (ret < 0) {
6875                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6876                         key.objectid, key.type, key.offset, ret);
6877                 goto out;
6878         }
6879         if (ret > 0) {
6880                 fprintf(stderr, "Well that's odd, we just found this key "
6881                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6882                         key.offset);
6883                 ret = -EINVAL;
6884                 goto out;
6885         }
6886         leaf = path->nodes[0];
6887         fi = btrfs_item_ptr(leaf, path->slots[0],
6888                             struct btrfs_file_extent_item);
6889
6890         if (btrfs_file_extent_compression(leaf, fi) &&
6891             dback->disk_bytenr != entry->bytenr) {
6892                 fprintf(stderr, "Ref doesn't match the record start and is "
6893                         "compressed, please take a btrfs-image of this file "
6894                         "system and send it to a btrfs developer so they can "
6895                         "complete this functionality for bytenr %Lu\n",
6896                         dback->disk_bytenr);
6897                 ret = -EINVAL;
6898                 goto out;
6899         }
6900
6901         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6902                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6903         } else if (dback->disk_bytenr > entry->bytenr) {
6904                 u64 off_diff, offset;
6905
6906                 off_diff = dback->disk_bytenr - entry->bytenr;
6907                 offset = btrfs_file_extent_offset(leaf, fi);
6908                 if (dback->disk_bytenr + offset +
6909                     btrfs_file_extent_num_bytes(leaf, fi) >
6910                     entry->bytenr + entry->bytes) {
6911                         fprintf(stderr, "Ref is past the entry end, please "
6912                                 "take a btrfs-image of this file system and "
6913                                 "send it to a btrfs developer, ref %Lu\n",
6914                                 dback->disk_bytenr);
6915                         ret = -EINVAL;
6916                         goto out;
6917                 }
6918                 offset += off_diff;
6919                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6920                 btrfs_set_file_extent_offset(leaf, fi, offset);
6921         } else if (dback->disk_bytenr < entry->bytenr) {
6922                 u64 offset;
6923
6924                 offset = btrfs_file_extent_offset(leaf, fi);
6925                 if (dback->disk_bytenr + offset < entry->bytenr) {
6926                         fprintf(stderr, "Ref is before the entry start, please"
6927                                 " take a btrfs-image of this file system and "
6928                                 "send it to a btrfs developer, ref %Lu\n",
6929                                 dback->disk_bytenr);
6930                         ret = -EINVAL;
6931                         goto out;
6932                 }
6933
6934                 offset += dback->disk_bytenr;
6935                 offset -= entry->bytenr;
6936                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6937                 btrfs_set_file_extent_offset(leaf, fi, offset);
6938         }
6939
6940         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6941
6942         /*
6943          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6944          * only do this if we aren't using compression, otherwise it's a
6945          * trickier case.
6946          */
6947         if (!btrfs_file_extent_compression(leaf, fi))
6948                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6949         else
6950                 printf("ram bytes may be wrong?\n");
6951         btrfs_mark_buffer_dirty(leaf);
6952 out:
6953         err = btrfs_commit_transaction(trans, root);
6954         btrfs_release_path(path);
6955         return ret ? ret : err;
6956 }
6957
6958 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6959                            struct extent_record *rec)
6960 {
6961         struct extent_backref *back, *tmp;
6962         struct data_backref *dback;
6963         struct extent_entry *entry, *best = NULL;
6964         LIST_HEAD(entries);
6965         int nr_entries = 0;
6966         int broken_entries = 0;
6967         int ret = 0;
6968         short mismatch = 0;
6969
6970         /*
6971          * Metadata is easy and the backrefs should always agree on bytenr and
6972          * size, if not we've got bigger issues.
6973          */
6974         if (rec->metadata)
6975                 return 0;
6976
6977         rbtree_postorder_for_each_entry_safe(back, tmp,
6978                                              &rec->backref_tree, node) {
6979                 if (back->full_backref || !back->is_data)
6980                         continue;
6981
6982                 dback = to_data_backref(back);
6983
6984                 /*
6985                  * We only pay attention to backrefs that we found a real
6986                  * backref for.
6987                  */
6988                 if (dback->found_ref == 0)
6989                         continue;
6990
6991                 /*
6992                  * For now we only catch when the bytes don't match, not the
6993                  * bytenr.  We can easily do this at the same time, but I want
6994                  * to have a fs image to test on before we just add repair
6995                  * functionality willy-nilly so we know we won't screw up the
6996                  * repair.
6997                  */
6998
6999                 entry = find_entry(&entries, dback->disk_bytenr,
7000                                    dback->bytes);
7001                 if (!entry) {
7002                         entry = malloc(sizeof(struct extent_entry));
7003                         if (!entry) {
7004                                 ret = -ENOMEM;
7005                                 goto out;
7006                         }
7007                         memset(entry, 0, sizeof(*entry));
7008                         entry->bytenr = dback->disk_bytenr;
7009                         entry->bytes = dback->bytes;
7010                         list_add_tail(&entry->list, &entries);
7011                         nr_entries++;
7012                 }
7013
7014                 /*
7015                  * If we only have on entry we may think the entries agree when
7016                  * in reality they don't so we have to do some extra checking.
7017                  */
7018                 if (dback->disk_bytenr != rec->start ||
7019                     dback->bytes != rec->nr || back->broken)
7020                         mismatch = 1;
7021
7022                 if (back->broken) {
7023                         entry->broken++;
7024                         broken_entries++;
7025                 }
7026
7027                 entry->count++;
7028         }
7029
7030         /* Yay all the backrefs agree, carry on good sir */
7031         if (nr_entries <= 1 && !mismatch)
7032                 goto out;
7033
7034         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7035                 "%Lu\n", rec->start);
7036
7037         /*
7038          * First we want to see if the backrefs can agree amongst themselves who
7039          * is right, so figure out which one of the entries has the highest
7040          * count.
7041          */
7042         best = find_most_right_entry(&entries);
7043
7044         /*
7045          * Ok so we may have an even split between what the backrefs think, so
7046          * this is where we use the extent ref to see what it thinks.
7047          */
7048         if (!best) {
7049                 entry = find_entry(&entries, rec->start, rec->nr);
7050                 if (!entry && (!broken_entries || !rec->found_rec)) {
7051                         fprintf(stderr, "Backrefs don't agree with each other "
7052                                 "and extent record doesn't agree with anybody,"
7053                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7054                                 rec->start, rec->nr);
7055                         ret = -EINVAL;
7056                         goto out;
7057                 } else if (!entry) {
7058                         /*
7059                          * Ok our backrefs were broken, we'll assume this is the
7060                          * correct value and add an entry for this range.
7061                          */
7062                         entry = malloc(sizeof(struct extent_entry));
7063                         if (!entry) {
7064                                 ret = -ENOMEM;
7065                                 goto out;
7066                         }
7067                         memset(entry, 0, sizeof(*entry));
7068                         entry->bytenr = rec->start;
7069                         entry->bytes = rec->nr;
7070                         list_add_tail(&entry->list, &entries);
7071                         nr_entries++;
7072                 }
7073                 entry->count++;
7074                 best = find_most_right_entry(&entries);
7075                 if (!best) {
7076                         fprintf(stderr, "Backrefs and extent record evenly "
7077                                 "split on who is right, this is going to "
7078                                 "require user input to fix bytenr %Lu bytes "
7079                                 "%Lu\n", rec->start, rec->nr);
7080                         ret = -EINVAL;
7081                         goto out;
7082                 }
7083         }
7084
7085         /*
7086          * I don't think this can happen currently as we'll abort() if we catch
7087          * this case higher up, but in case somebody removes that we still can't
7088          * deal with it properly here yet, so just bail out of that's the case.
7089          */
7090         if (best->bytenr != rec->start) {
7091                 fprintf(stderr, "Extent start and backref starts don't match, "
7092                         "please use btrfs-image on this file system and send "
7093                         "it to a btrfs developer so they can make fsck fix "
7094                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7095                         rec->start, rec->nr);
7096                 ret = -EINVAL;
7097                 goto out;
7098         }
7099
7100         /*
7101          * Ok great we all agreed on an extent record, let's go find the real
7102          * references and fix up the ones that don't match.
7103          */
7104         rbtree_postorder_for_each_entry_safe(back, tmp,
7105                                              &rec->backref_tree, node) {
7106                 if (back->full_backref || !back->is_data)
7107                         continue;
7108
7109                 dback = to_data_backref(back);
7110
7111                 /*
7112                  * Still ignoring backrefs that don't have a real ref attached
7113                  * to them.
7114                  */
7115                 if (dback->found_ref == 0)
7116                         continue;
7117
7118                 if (dback->bytes == best->bytes &&
7119                     dback->disk_bytenr == best->bytenr)
7120                         continue;
7121
7122                 ret = repair_ref(info, path, dback, best);
7123                 if (ret)
7124                         goto out;
7125         }
7126
7127         /*
7128          * Ok we messed with the actual refs, which means we need to drop our
7129          * entire cache and go back and rescan.  I know this is a huge pain and
7130          * adds a lot of extra work, but it's the only way to be safe.  Once all
7131          * the backrefs agree we may not need to do anything to the extent
7132          * record itself.
7133          */
7134         ret = -EAGAIN;
7135 out:
7136         while (!list_empty(&entries)) {
7137                 entry = list_entry(entries.next, struct extent_entry, list);
7138                 list_del_init(&entry->list);
7139                 free(entry);
7140         }
7141         return ret;
7142 }
7143
7144 static int process_duplicates(struct btrfs_root *root,
7145                               struct cache_tree *extent_cache,
7146                               struct extent_record *rec)
7147 {
7148         struct extent_record *good, *tmp;
7149         struct cache_extent *cache;
7150         int ret;
7151
7152         /*
7153          * If we found a extent record for this extent then return, or if we
7154          * have more than one duplicate we are likely going to need to delete
7155          * something.
7156          */
7157         if (rec->found_rec || rec->num_duplicates > 1)
7158                 return 0;
7159
7160         /* Shouldn't happen but just in case */
7161         BUG_ON(!rec->num_duplicates);
7162
7163         /*
7164          * So this happens if we end up with a backref that doesn't match the
7165          * actual extent entry.  So either the backref is bad or the extent
7166          * entry is bad.  Either way we want to have the extent_record actually
7167          * reflect what we found in the extent_tree, so we need to take the
7168          * duplicate out and use that as the extent_record since the only way we
7169          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7170          */
7171         remove_cache_extent(extent_cache, &rec->cache);
7172
7173         good = to_extent_record(rec->dups.next);
7174         list_del_init(&good->list);
7175         INIT_LIST_HEAD(&good->backrefs);
7176         INIT_LIST_HEAD(&good->dups);
7177         good->cache.start = good->start;
7178         good->cache.size = good->nr;
7179         good->content_checked = 0;
7180         good->owner_ref_checked = 0;
7181         good->num_duplicates = 0;
7182         good->refs = rec->refs;
7183         list_splice_init(&rec->backrefs, &good->backrefs);
7184         while (1) {
7185                 cache = lookup_cache_extent(extent_cache, good->start,
7186                                             good->nr);
7187                 if (!cache)
7188                         break;
7189                 tmp = container_of(cache, struct extent_record, cache);
7190
7191                 /*
7192                  * If we find another overlapping extent and it's found_rec is
7193                  * set then it's a duplicate and we need to try and delete
7194                  * something.
7195                  */
7196                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7197                         if (list_empty(&good->list))
7198                                 list_add_tail(&good->list,
7199                                               &duplicate_extents);
7200                         good->num_duplicates += tmp->num_duplicates + 1;
7201                         list_splice_init(&tmp->dups, &good->dups);
7202                         list_del_init(&tmp->list);
7203                         list_add_tail(&tmp->list, &good->dups);
7204                         remove_cache_extent(extent_cache, &tmp->cache);
7205                         continue;
7206                 }
7207
7208                 /*
7209                  * Ok we have another non extent item backed extent rec, so lets
7210                  * just add it to this extent and carry on like we did above.
7211                  */
7212                 good->refs += tmp->refs;
7213                 list_splice_init(&tmp->backrefs, &good->backrefs);
7214                 remove_cache_extent(extent_cache, &tmp->cache);
7215                 free(tmp);
7216         }
7217         ret = insert_cache_extent(extent_cache, &good->cache);
7218         BUG_ON(ret);
7219         free(rec);
7220         return good->num_duplicates ? 0 : 1;
7221 }
7222
7223 static int delete_duplicate_records(struct btrfs_root *root,
7224                                     struct extent_record *rec)
7225 {
7226         struct btrfs_trans_handle *trans;
7227         LIST_HEAD(delete_list);
7228         struct btrfs_path *path;
7229         struct extent_record *tmp, *good, *n;
7230         int nr_del = 0;
7231         int ret = 0, err;
7232         struct btrfs_key key;
7233
7234         path = btrfs_alloc_path();
7235         if (!path) {
7236                 ret = -ENOMEM;
7237                 goto out;
7238         }
7239
7240         good = rec;
7241         /* Find the record that covers all of the duplicates. */
7242         list_for_each_entry(tmp, &rec->dups, list) {
7243                 if (good->start < tmp->start)
7244                         continue;
7245                 if (good->nr > tmp->nr)
7246                         continue;
7247
7248                 if (tmp->start + tmp->nr < good->start + good->nr) {
7249                         fprintf(stderr, "Ok we have overlapping extents that "
7250                                 "aren't completely covered by each other, this "
7251                                 "is going to require more careful thought.  "
7252                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7253                                 tmp->start, tmp->nr, good->start, good->nr);
7254                         abort();
7255                 }
7256                 good = tmp;
7257         }
7258
7259         if (good != rec)
7260                 list_add_tail(&rec->list, &delete_list);
7261
7262         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7263                 if (tmp == good)
7264                         continue;
7265                 list_move_tail(&tmp->list, &delete_list);
7266         }
7267
7268         root = root->fs_info->extent_root;
7269         trans = btrfs_start_transaction(root, 1);
7270         if (IS_ERR(trans)) {
7271                 ret = PTR_ERR(trans);
7272                 goto out;
7273         }
7274
7275         list_for_each_entry(tmp, &delete_list, list) {
7276                 if (tmp->found_rec == 0)
7277                         continue;
7278                 key.objectid = tmp->start;
7279                 key.type = BTRFS_EXTENT_ITEM_KEY;
7280                 key.offset = tmp->nr;
7281
7282                 /* Shouldn't happen but just in case */
7283                 if (tmp->metadata) {
7284                         fprintf(stderr, "Well this shouldn't happen, extent "
7285                                 "record overlaps but is metadata? "
7286                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7287                         abort();
7288                 }
7289
7290                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7291                 if (ret) {
7292                         if (ret > 0)
7293                                 ret = -EINVAL;
7294                         break;
7295                 }
7296                 ret = btrfs_del_item(trans, root, path);
7297                 if (ret)
7298                         break;
7299                 btrfs_release_path(path);
7300                 nr_del++;
7301         }
7302         err = btrfs_commit_transaction(trans, root);
7303         if (err && !ret)
7304                 ret = err;
7305 out:
7306         while (!list_empty(&delete_list)) {
7307                 tmp = to_extent_record(delete_list.next);
7308                 list_del_init(&tmp->list);
7309                 if (tmp == rec)
7310                         continue;
7311                 free(tmp);
7312         }
7313
7314         while (!list_empty(&rec->dups)) {
7315                 tmp = to_extent_record(rec->dups.next);
7316                 list_del_init(&tmp->list);
7317                 free(tmp);
7318         }
7319
7320         btrfs_free_path(path);
7321
7322         if (!ret && !nr_del)
7323                 rec->num_duplicates = 0;
7324
7325         return ret ? ret : nr_del;
7326 }
7327
7328 static int find_possible_backrefs(struct btrfs_fs_info *info,
7329                                   struct btrfs_path *path,
7330                                   struct cache_tree *extent_cache,
7331                                   struct extent_record *rec)
7332 {
7333         struct btrfs_root *root;
7334         struct extent_backref *back, *tmp;
7335         struct data_backref *dback;
7336         struct cache_extent *cache;
7337         struct btrfs_file_extent_item *fi;
7338         struct btrfs_key key;
7339         u64 bytenr, bytes;
7340         int ret;
7341
7342         rbtree_postorder_for_each_entry_safe(back, tmp,
7343                                              &rec->backref_tree, node) {
7344                 /* Don't care about full backrefs (poor unloved backrefs) */
7345                 if (back->full_backref || !back->is_data)
7346                         continue;
7347
7348                 dback = to_data_backref(back);
7349
7350                 /* We found this one, we don't need to do a lookup */
7351                 if (dback->found_ref)
7352                         continue;
7353
7354                 key.objectid = dback->root;
7355                 key.type = BTRFS_ROOT_ITEM_KEY;
7356                 key.offset = (u64)-1;
7357
7358                 root = btrfs_read_fs_root(info, &key);
7359
7360                 /* No root, definitely a bad ref, skip */
7361                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7362                         continue;
7363                 /* Other err, exit */
7364                 if (IS_ERR(root))
7365                         return PTR_ERR(root);
7366
7367                 key.objectid = dback->owner;
7368                 key.type = BTRFS_EXTENT_DATA_KEY;
7369                 key.offset = dback->offset;
7370                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7371                 if (ret) {
7372                         btrfs_release_path(path);
7373                         if (ret < 0)
7374                                 return ret;
7375                         /* Didn't find it, we can carry on */
7376                         ret = 0;
7377                         continue;
7378                 }
7379
7380                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7381                                     struct btrfs_file_extent_item);
7382                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7383                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7384                 btrfs_release_path(path);
7385                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7386                 if (cache) {
7387                         struct extent_record *tmp;
7388                         tmp = container_of(cache, struct extent_record, cache);
7389
7390                         /*
7391                          * If we found an extent record for the bytenr for this
7392                          * particular backref then we can't add it to our
7393                          * current extent record.  We only want to add backrefs
7394                          * that don't have a corresponding extent item in the
7395                          * extent tree since they likely belong to this record
7396                          * and we need to fix it if it doesn't match bytenrs.
7397                          */
7398                         if  (tmp->found_rec)
7399                                 continue;
7400                 }
7401
7402                 dback->found_ref += 1;
7403                 dback->disk_bytenr = bytenr;
7404                 dback->bytes = bytes;
7405
7406                 /*
7407                  * Set this so the verify backref code knows not to trust the
7408                  * values in this backref.
7409                  */
7410                 back->broken = 1;
7411         }
7412
7413         return 0;
7414 }
7415
7416 /*
7417  * Record orphan data ref into corresponding root.
7418  *
7419  * Return 0 if the extent item contains data ref and recorded.
7420  * Return 1 if the extent item contains no useful data ref
7421  *   On that case, it may contains only shared_dataref or metadata backref
7422  *   or the file extent exists(this should be handled by the extent bytenr
7423  *   recovery routine)
7424  * Return <0 if something goes wrong.
7425  */
7426 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7427                                       struct extent_record *rec)
7428 {
7429         struct btrfs_key key;
7430         struct btrfs_root *dest_root;
7431         struct extent_backref *back, *tmp;
7432         struct data_backref *dback;
7433         struct orphan_data_extent *orphan;
7434         struct btrfs_path *path;
7435         int recorded_data_ref = 0;
7436         int ret = 0;
7437
7438         if (rec->metadata)
7439                 return 1;
7440         path = btrfs_alloc_path();
7441         if (!path)
7442                 return -ENOMEM;
7443         rbtree_postorder_for_each_entry_safe(back, tmp,
7444                                              &rec->backref_tree, node) {
7445                 if (back->full_backref || !back->is_data ||
7446                     !back->found_extent_tree)
7447                         continue;
7448                 dback = to_data_backref(back);
7449                 if (dback->found_ref)
7450                         continue;
7451                 key.objectid = dback->root;
7452                 key.type = BTRFS_ROOT_ITEM_KEY;
7453                 key.offset = (u64)-1;
7454
7455                 dest_root = btrfs_read_fs_root(fs_info, &key);
7456
7457                 /* For non-exist root we just skip it */
7458                 if (IS_ERR(dest_root) || !dest_root)
7459                         continue;
7460
7461                 key.objectid = dback->owner;
7462                 key.type = BTRFS_EXTENT_DATA_KEY;
7463                 key.offset = dback->offset;
7464
7465                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7466                 /*
7467                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7468                  * we need to record it for inode/file extent rebuild.
7469                  * For ret > 0, we record it only for file extent rebuild.
7470                  * For ret == 0, the file extent exists but only bytenr
7471                  * mismatch, let the original bytenr fix routine to handle,
7472                  * don't record it.
7473                  */
7474                 if (ret == 0)
7475                         continue;
7476                 ret = 0;
7477                 orphan = malloc(sizeof(*orphan));
7478                 if (!orphan) {
7479                         ret = -ENOMEM;
7480                         goto out;
7481                 }
7482                 INIT_LIST_HEAD(&orphan->list);
7483                 orphan->root = dback->root;
7484                 orphan->objectid = dback->owner;
7485                 orphan->offset = dback->offset;
7486                 orphan->disk_bytenr = rec->cache.start;
7487                 orphan->disk_len = rec->cache.size;
7488                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7489                 recorded_data_ref = 1;
7490         }
7491 out:
7492         btrfs_free_path(path);
7493         if (!ret)
7494                 return !recorded_data_ref;
7495         else
7496                 return ret;
7497 }
7498
7499 /*
7500  * when an incorrect extent item is found, this will delete
7501  * all of the existing entries for it and recreate them
7502  * based on what the tree scan found.
7503  */
7504 static int fixup_extent_refs(struct btrfs_fs_info *info,
7505                              struct cache_tree *extent_cache,
7506                              struct extent_record *rec)
7507 {
7508         struct btrfs_trans_handle *trans = NULL;
7509         int ret;
7510         struct btrfs_path *path;
7511         struct cache_extent *cache;
7512         struct extent_backref *back, *tmp;
7513         int allocated = 0;
7514         u64 flags = 0;
7515
7516         if (rec->flag_block_full_backref)
7517                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7518
7519         path = btrfs_alloc_path();
7520         if (!path)
7521                 return -ENOMEM;
7522
7523         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7524                 /*
7525                  * Sometimes the backrefs themselves are so broken they don't
7526                  * get attached to any meaningful rec, so first go back and
7527                  * check any of our backrefs that we couldn't find and throw
7528                  * them into the list if we find the backref so that
7529                  * verify_backrefs can figure out what to do.
7530                  */
7531                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7532                 if (ret < 0)
7533                         goto out;
7534         }
7535
7536         /* step one, make sure all of the backrefs agree */
7537         ret = verify_backrefs(info, path, rec);
7538         if (ret < 0)
7539                 goto out;
7540
7541         trans = btrfs_start_transaction(info->extent_root, 1);
7542         if (IS_ERR(trans)) {
7543                 ret = PTR_ERR(trans);
7544                 goto out;
7545         }
7546
7547         /* step two, delete all the existing records */
7548         ret = delete_extent_records(trans, info->extent_root, path,
7549                                     rec->start, rec->max_size);
7550
7551         if (ret < 0)
7552                 goto out;
7553
7554         /* was this block corrupt?  If so, don't add references to it */
7555         cache = lookup_cache_extent(info->corrupt_blocks,
7556                                     rec->start, rec->max_size);
7557         if (cache) {
7558                 ret = 0;
7559                 goto out;
7560         }
7561
7562         /* step three, recreate all the refs we did find */
7563         rbtree_postorder_for_each_entry_safe(back, tmp,
7564                                              &rec->backref_tree, node) {
7565                 /*
7566                  * if we didn't find any references, don't create a
7567                  * new extent record
7568                  */
7569                 if (!back->found_ref)
7570                         continue;
7571
7572                 rec->bad_full_backref = 0;
7573                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7574                 allocated = 1;
7575
7576                 if (ret)
7577                         goto out;
7578         }
7579 out:
7580         if (trans) {
7581                 int err = btrfs_commit_transaction(trans, info->extent_root);
7582                 if (!ret)
7583                         ret = err;
7584         }
7585
7586         btrfs_free_path(path);
7587         return ret;
7588 }
7589
7590 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7591                               struct extent_record *rec)
7592 {
7593         struct btrfs_trans_handle *trans;
7594         struct btrfs_root *root = fs_info->extent_root;
7595         struct btrfs_path *path;
7596         struct btrfs_extent_item *ei;
7597         struct btrfs_key key;
7598         u64 flags;
7599         int ret = 0;
7600
7601         key.objectid = rec->start;
7602         if (rec->metadata) {
7603                 key.type = BTRFS_METADATA_ITEM_KEY;
7604                 key.offset = rec->info_level;
7605         } else {
7606                 key.type = BTRFS_EXTENT_ITEM_KEY;
7607                 key.offset = rec->max_size;
7608         }
7609
7610         path = btrfs_alloc_path();
7611         if (!path)
7612                 return -ENOMEM;
7613
7614         trans = btrfs_start_transaction(root, 0);
7615         if (IS_ERR(trans)) {
7616                 btrfs_free_path(path);
7617                 return PTR_ERR(trans);
7618         }
7619
7620         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7621         if (ret < 0) {
7622                 btrfs_free_path(path);
7623                 btrfs_commit_transaction(trans, root);
7624                 return ret;
7625         } else if (ret) {
7626                 fprintf(stderr, "Didn't find extent for %llu\n",
7627                         (unsigned long long)rec->start);
7628                 btrfs_free_path(path);
7629                 btrfs_commit_transaction(trans, root);
7630                 return -ENOENT;
7631         }
7632
7633         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7634                             struct btrfs_extent_item);
7635         flags = btrfs_extent_flags(path->nodes[0], ei);
7636         if (rec->flag_block_full_backref) {
7637                 fprintf(stderr, "setting full backref on %llu\n",
7638                         (unsigned long long)key.objectid);
7639                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7640         } else {
7641                 fprintf(stderr, "clearing full backref on %llu\n",
7642                         (unsigned long long)key.objectid);
7643                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7644         }
7645         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7646         btrfs_mark_buffer_dirty(path->nodes[0]);
7647         btrfs_free_path(path);
7648         return btrfs_commit_transaction(trans, root);
7649 }
7650
7651 /* right now we only prune from the extent allocation tree */
7652 static int prune_one_block(struct btrfs_trans_handle *trans,
7653                            struct btrfs_fs_info *info,
7654                            struct btrfs_corrupt_block *corrupt)
7655 {
7656         int ret;
7657         struct btrfs_path path;
7658         struct extent_buffer *eb;
7659         u64 found;
7660         int slot;
7661         int nritems;
7662         int level = corrupt->level + 1;
7663
7664         btrfs_init_path(&path);
7665 again:
7666         /* we want to stop at the parent to our busted block */
7667         path.lowest_level = level;
7668
7669         ret = btrfs_search_slot(trans, info->extent_root,
7670                                 &corrupt->key, &path, -1, 1);
7671
7672         if (ret < 0)
7673                 goto out;
7674
7675         eb = path.nodes[level];
7676         if (!eb) {
7677                 ret = -ENOENT;
7678                 goto out;
7679         }
7680
7681         /*
7682          * hopefully the search gave us the block we want to prune,
7683          * lets try that first
7684          */
7685         slot = path.slots[level];
7686         found =  btrfs_node_blockptr(eb, slot);
7687         if (found == corrupt->cache.start)
7688                 goto del_ptr;
7689
7690         nritems = btrfs_header_nritems(eb);
7691
7692         /* the search failed, lets scan this node and hope we find it */
7693         for (slot = 0; slot < nritems; slot++) {
7694                 found =  btrfs_node_blockptr(eb, slot);
7695                 if (found == corrupt->cache.start)
7696                         goto del_ptr;
7697         }
7698         /*
7699          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7700          * to this block
7701          */
7702         if (eb == info->extent_root->node) {
7703                 ret = -ENOENT;
7704                 goto out;
7705         } else {
7706                 level++;
7707                 btrfs_release_path(&path);
7708                 goto again;
7709         }
7710
7711 del_ptr:
7712         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7713         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7714
7715 out:
7716         btrfs_release_path(&path);
7717         return ret;
7718 }
7719
7720 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7721 {
7722         struct btrfs_trans_handle *trans = NULL;
7723         struct cache_extent *cache;
7724         struct btrfs_corrupt_block *corrupt;
7725
7726         while (1) {
7727                 cache = search_cache_extent(info->corrupt_blocks, 0);
7728                 if (!cache)
7729                         break;
7730                 if (!trans) {
7731                         trans = btrfs_start_transaction(info->extent_root, 1);
7732                         if (IS_ERR(trans))
7733                                 return PTR_ERR(trans);
7734                 }
7735                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7736                 prune_one_block(trans, info, corrupt);
7737                 remove_cache_extent(info->corrupt_blocks, cache);
7738         }
7739         if (trans)
7740                 return btrfs_commit_transaction(trans, info->extent_root);
7741         return 0;
7742 }
7743
7744 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7745 {
7746         struct btrfs_block_group_cache *cache;
7747         u64 start, end;
7748         int ret;
7749
7750         while (1) {
7751                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7752                                             &start, &end, EXTENT_DIRTY);
7753                 if (ret)
7754                         break;
7755                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7756                                    GFP_NOFS);
7757         }
7758
7759         start = 0;
7760         while (1) {
7761                 cache = btrfs_lookup_first_block_group(fs_info, start);
7762                 if (!cache)
7763                         break;
7764                 if (cache->cached)
7765                         cache->cached = 0;
7766                 start = cache->key.objectid + cache->key.offset;
7767         }
7768 }
7769
7770 static int check_extent_refs(struct btrfs_root *root,
7771                              struct cache_tree *extent_cache)
7772 {
7773         struct extent_record *rec;
7774         struct cache_extent *cache;
7775         int err = 0;
7776         int ret = 0;
7777         int fixed = 0;
7778         int had_dups = 0;
7779         int recorded = 0;
7780
7781         if (repair) {
7782                 /*
7783                  * if we're doing a repair, we have to make sure
7784                  * we don't allocate from the problem extents.
7785                  * In the worst case, this will be all the
7786                  * extents in the FS
7787                  */
7788                 cache = search_cache_extent(extent_cache, 0);
7789                 while(cache) {
7790                         rec = container_of(cache, struct extent_record, cache);
7791                         set_extent_dirty(root->fs_info->excluded_extents,
7792                                          rec->start,
7793                                          rec->start + rec->max_size - 1,
7794                                          GFP_NOFS);
7795                         cache = next_cache_extent(cache);
7796                 }
7797
7798                 /* pin down all the corrupted blocks too */
7799                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7800                 while(cache) {
7801                         set_extent_dirty(root->fs_info->excluded_extents,
7802                                          cache->start,
7803                                          cache->start + cache->size - 1,
7804                                          GFP_NOFS);
7805                         cache = next_cache_extent(cache);
7806                 }
7807                 prune_corrupt_blocks(root->fs_info);
7808                 reset_cached_block_groups(root->fs_info);
7809         }
7810
7811         reset_cached_block_groups(root->fs_info);
7812
7813         /*
7814          * We need to delete any duplicate entries we find first otherwise we
7815          * could mess up the extent tree when we have backrefs that actually
7816          * belong to a different extent item and not the weird duplicate one.
7817          */
7818         while (repair && !list_empty(&duplicate_extents)) {
7819                 rec = to_extent_record(duplicate_extents.next);
7820                 list_del_init(&rec->list);
7821
7822                 /* Sometimes we can find a backref before we find an actual
7823                  * extent, so we need to process it a little bit to see if there
7824                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7825                  * if this is a backref screwup.  If we need to delete stuff
7826                  * process_duplicates() will return 0, otherwise it will return
7827                  * 1 and we
7828                  */
7829                 if (process_duplicates(root, extent_cache, rec))
7830                         continue;
7831                 ret = delete_duplicate_records(root, rec);
7832                 if (ret < 0)
7833                         return ret;
7834                 /*
7835                  * delete_duplicate_records will return the number of entries
7836                  * deleted, so if it's greater than 0 then we know we actually
7837                  * did something and we need to remove.
7838                  */
7839                 if (ret)
7840                         had_dups = 1;
7841         }
7842
7843         if (had_dups)
7844                 return -EAGAIN;
7845
7846         while(1) {
7847                 int cur_err = 0;
7848
7849                 fixed = 0;
7850                 recorded = 0;
7851                 cache = search_cache_extent(extent_cache, 0);
7852                 if (!cache)
7853                         break;
7854                 rec = container_of(cache, struct extent_record, cache);
7855                 if (rec->num_duplicates) {
7856                         fprintf(stderr, "extent item %llu has multiple extent "
7857                                 "items\n", (unsigned long long)rec->start);
7858                         err = 1;
7859                         cur_err = 1;
7860                 }
7861
7862                 if (rec->refs != rec->extent_item_refs) {
7863                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7864                                 (unsigned long long)rec->start,
7865                                 (unsigned long long)rec->nr);
7866                         fprintf(stderr, "extent item %llu, found %llu\n",
7867                                 (unsigned long long)rec->extent_item_refs,
7868                                 (unsigned long long)rec->refs);
7869                         ret = record_orphan_data_extents(root->fs_info, rec);
7870                         if (ret < 0)
7871                                 goto repair_abort;
7872                         if (ret == 0) {
7873                                 recorded = 1;
7874                         } else {
7875                                 /*
7876                                  * we can't use the extent to repair file
7877                                  * extent, let the fallback method handle it.
7878                                  */
7879                                 if (!fixed && repair) {
7880                                         ret = fixup_extent_refs(
7881                                                         root->fs_info,
7882                                                         extent_cache, rec);
7883                                         if (ret)
7884                                                 goto repair_abort;
7885                                         fixed = 1;
7886                                 }
7887                         }
7888                         err = 1;
7889                         cur_err = 1;
7890                 }
7891                 if (all_backpointers_checked(rec, 1)) {
7892                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7893                                 (unsigned long long)rec->start,
7894                                 (unsigned long long)rec->nr);
7895
7896                         if (!fixed && !recorded && repair) {
7897                                 ret = fixup_extent_refs(root->fs_info,
7898                                                         extent_cache, rec);
7899                                 if (ret)
7900                                         goto repair_abort;
7901                                 fixed = 1;
7902                         }
7903                         cur_err = 1;
7904                         err = 1;
7905                 }
7906                 if (!rec->owner_ref_checked) {
7907                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7908                                 (unsigned long long)rec->start,
7909                                 (unsigned long long)rec->nr);
7910                         if (!fixed && !recorded && repair) {
7911                                 ret = fixup_extent_refs(root->fs_info,
7912                                                         extent_cache, rec);
7913                                 if (ret)
7914                                         goto repair_abort;
7915                                 fixed = 1;
7916                         }
7917                         err = 1;
7918                         cur_err = 1;
7919                 }
7920                 if (rec->bad_full_backref) {
7921                         fprintf(stderr, "bad full backref, on [%llu]\n",
7922                                 (unsigned long long)rec->start);
7923                         if (repair) {
7924                                 ret = fixup_extent_flags(root->fs_info, rec);
7925                                 if (ret)
7926                                         goto repair_abort;
7927                                 fixed = 1;
7928                         }
7929                         err = 1;
7930                         cur_err = 1;
7931                 }
7932                 /*
7933                  * Although it's not a extent ref's problem, we reuse this
7934                  * routine for error reporting.
7935                  * No repair function yet.
7936                  */
7937                 if (rec->crossing_stripes) {
7938                         fprintf(stderr,
7939                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7940                                 rec->start, rec->start + rec->max_size);
7941                         err = 1;
7942                         cur_err = 1;
7943                 }
7944
7945                 if (rec->wrong_chunk_type) {
7946                         fprintf(stderr,
7947                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7948                                 rec->start, rec->start + rec->max_size);
7949                         err = 1;
7950                         cur_err = 1;
7951                 }
7952
7953                 remove_cache_extent(extent_cache, cache);
7954                 free_all_extent_backrefs(rec);
7955                 if (!init_extent_tree && repair && (!cur_err || fixed))
7956                         clear_extent_dirty(root->fs_info->excluded_extents,
7957                                            rec->start,
7958                                            rec->start + rec->max_size - 1,
7959                                            GFP_NOFS);
7960                 free(rec);
7961         }
7962 repair_abort:
7963         if (repair) {
7964                 if (ret && ret != -EAGAIN) {
7965                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7966                         exit(1);
7967                 } else if (!ret) {
7968                         struct btrfs_trans_handle *trans;
7969
7970                         root = root->fs_info->extent_root;
7971                         trans = btrfs_start_transaction(root, 1);
7972                         if (IS_ERR(trans)) {
7973                                 ret = PTR_ERR(trans);
7974                                 goto repair_abort;
7975                         }
7976
7977                         btrfs_fix_block_accounting(trans, root);
7978                         ret = btrfs_commit_transaction(trans, root);
7979                         if (ret)
7980                                 goto repair_abort;
7981                 }
7982                 if (err)
7983                         fprintf(stderr, "repaired damaged extent references\n");
7984                 return ret;
7985         }
7986         return err;
7987 }
7988
7989 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7990 {
7991         u64 stripe_size;
7992
7993         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7994                 stripe_size = length;
7995                 stripe_size /= num_stripes;
7996         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7997                 stripe_size = length * 2;
7998                 stripe_size /= num_stripes;
7999         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8000                 stripe_size = length;
8001                 stripe_size /= (num_stripes - 1);
8002         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8003                 stripe_size = length;
8004                 stripe_size /= (num_stripes - 2);
8005         } else {
8006                 stripe_size = length;
8007         }
8008         return stripe_size;
8009 }
8010
8011 /*
8012  * Check the chunk with its block group/dev list ref:
8013  * Return 0 if all refs seems valid.
8014  * Return 1 if part of refs seems valid, need later check for rebuild ref
8015  * like missing block group and needs to search extent tree to rebuild them.
8016  * Return -1 if essential refs are missing and unable to rebuild.
8017  */
8018 static int check_chunk_refs(struct chunk_record *chunk_rec,
8019                             struct block_group_tree *block_group_cache,
8020                             struct device_extent_tree *dev_extent_cache,
8021                             int silent)
8022 {
8023         struct cache_extent *block_group_item;
8024         struct block_group_record *block_group_rec;
8025         struct cache_extent *dev_extent_item;
8026         struct device_extent_record *dev_extent_rec;
8027         u64 devid;
8028         u64 offset;
8029         u64 length;
8030         int metadump_v2 = 0;
8031         int i;
8032         int ret = 0;
8033
8034         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8035                                                chunk_rec->offset,
8036                                                chunk_rec->length);
8037         if (block_group_item) {
8038                 block_group_rec = container_of(block_group_item,
8039                                                struct block_group_record,
8040                                                cache);
8041                 if (chunk_rec->length != block_group_rec->offset ||
8042                     chunk_rec->offset != block_group_rec->objectid ||
8043                     (!metadump_v2 &&
8044                      chunk_rec->type_flags != block_group_rec->flags)) {
8045                         if (!silent)
8046                                 fprintf(stderr,
8047                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8048                                         chunk_rec->objectid,
8049                                         chunk_rec->type,
8050                                         chunk_rec->offset,
8051                                         chunk_rec->length,
8052                                         chunk_rec->offset,
8053                                         chunk_rec->type_flags,
8054                                         block_group_rec->objectid,
8055                                         block_group_rec->type,
8056                                         block_group_rec->offset,
8057                                         block_group_rec->offset,
8058                                         block_group_rec->objectid,
8059                                         block_group_rec->flags);
8060                         ret = -1;
8061                 } else {
8062                         list_del_init(&block_group_rec->list);
8063                         chunk_rec->bg_rec = block_group_rec;
8064                 }
8065         } else {
8066                 if (!silent)
8067                         fprintf(stderr,
8068                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8069                                 chunk_rec->objectid,
8070                                 chunk_rec->type,
8071                                 chunk_rec->offset,
8072                                 chunk_rec->length,
8073                                 chunk_rec->offset,
8074                                 chunk_rec->type_flags);
8075                 ret = 1;
8076         }
8077
8078         if (metadump_v2)
8079                 return ret;
8080
8081         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8082                                     chunk_rec->num_stripes);
8083         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8084                 devid = chunk_rec->stripes[i].devid;
8085                 offset = chunk_rec->stripes[i].offset;
8086                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8087                                                        devid, offset, length);
8088                 if (dev_extent_item) {
8089                         dev_extent_rec = container_of(dev_extent_item,
8090                                                 struct device_extent_record,
8091                                                 cache);
8092                         if (dev_extent_rec->objectid != devid ||
8093                             dev_extent_rec->offset != offset ||
8094                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8095                             dev_extent_rec->length != length) {
8096                                 if (!silent)
8097                                         fprintf(stderr,
8098                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8099                                                 chunk_rec->objectid,
8100                                                 chunk_rec->type,
8101                                                 chunk_rec->offset,
8102                                                 chunk_rec->stripes[i].devid,
8103                                                 chunk_rec->stripes[i].offset,
8104                                                 dev_extent_rec->objectid,
8105                                                 dev_extent_rec->offset,
8106                                                 dev_extent_rec->length);
8107                                 ret = -1;
8108                         } else {
8109                                 list_move(&dev_extent_rec->chunk_list,
8110                                           &chunk_rec->dextents);
8111                         }
8112                 } else {
8113                         if (!silent)
8114                                 fprintf(stderr,
8115                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8116                                         chunk_rec->objectid,
8117                                         chunk_rec->type,
8118                                         chunk_rec->offset,
8119                                         chunk_rec->stripes[i].devid,
8120                                         chunk_rec->stripes[i].offset);
8121                         ret = -1;
8122                 }
8123         }
8124         return ret;
8125 }
8126
8127 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8128 int check_chunks(struct cache_tree *chunk_cache,
8129                  struct block_group_tree *block_group_cache,
8130                  struct device_extent_tree *dev_extent_cache,
8131                  struct list_head *good, struct list_head *bad,
8132                  struct list_head *rebuild, int silent)
8133 {
8134         struct cache_extent *chunk_item;
8135         struct chunk_record *chunk_rec;
8136         struct block_group_record *bg_rec;
8137         struct device_extent_record *dext_rec;
8138         int err;
8139         int ret = 0;
8140
8141         chunk_item = first_cache_extent(chunk_cache);
8142         while (chunk_item) {
8143                 chunk_rec = container_of(chunk_item, struct chunk_record,
8144                                          cache);
8145                 err = check_chunk_refs(chunk_rec, block_group_cache,
8146                                        dev_extent_cache, silent);
8147                 if (err < 0)
8148                         ret = err;
8149                 if (err == 0 && good)
8150                         list_add_tail(&chunk_rec->list, good);
8151                 if (err > 0 && rebuild)
8152                         list_add_tail(&chunk_rec->list, rebuild);
8153                 if (err < 0 && bad)
8154                         list_add_tail(&chunk_rec->list, bad);
8155                 chunk_item = next_cache_extent(chunk_item);
8156         }
8157
8158         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8159                 if (!silent)
8160                         fprintf(stderr,
8161                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8162                                 bg_rec->objectid,
8163                                 bg_rec->offset,
8164                                 bg_rec->flags);
8165                 if (!ret)
8166                         ret = 1;
8167         }
8168
8169         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8170                             chunk_list) {
8171                 if (!silent)
8172                         fprintf(stderr,
8173                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8174                                 dext_rec->objectid,
8175                                 dext_rec->offset,
8176                                 dext_rec->length);
8177                 if (!ret)
8178                         ret = 1;
8179         }
8180         return ret;
8181 }
8182
8183
8184 static int check_device_used(struct device_record *dev_rec,
8185                              struct device_extent_tree *dext_cache)
8186 {
8187         struct cache_extent *cache;
8188         struct device_extent_record *dev_extent_rec;
8189         u64 total_byte = 0;
8190
8191         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8192         while (cache) {
8193                 dev_extent_rec = container_of(cache,
8194                                               struct device_extent_record,
8195                                               cache);
8196                 if (dev_extent_rec->objectid != dev_rec->devid)
8197                         break;
8198
8199                 list_del_init(&dev_extent_rec->device_list);
8200                 total_byte += dev_extent_rec->length;
8201                 cache = next_cache_extent(cache);
8202         }
8203
8204         if (total_byte != dev_rec->byte_used) {
8205                 fprintf(stderr,
8206                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8207                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8208                         dev_rec->type, dev_rec->offset);
8209                 return -1;
8210         } else {
8211                 return 0;
8212         }
8213 }
8214
8215 /* check btrfs_dev_item -> btrfs_dev_extent */
8216 static int check_devices(struct rb_root *dev_cache,
8217                          struct device_extent_tree *dev_extent_cache)
8218 {
8219         struct rb_node *dev_node;
8220         struct device_record *dev_rec;
8221         struct device_extent_record *dext_rec;
8222         int err;
8223         int ret = 0;
8224
8225         dev_node = rb_first(dev_cache);
8226         while (dev_node) {
8227                 dev_rec = container_of(dev_node, struct device_record, node);
8228                 err = check_device_used(dev_rec, dev_extent_cache);
8229                 if (err)
8230                         ret = err;
8231
8232                 dev_node = rb_next(dev_node);
8233         }
8234         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8235                             device_list) {
8236                 fprintf(stderr,
8237                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8238                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8239                 if (!ret)
8240                         ret = 1;
8241         }
8242         return ret;
8243 }
8244
8245 static int add_root_item_to_list(struct list_head *head,
8246                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8247                                   u8 level, u8 drop_level,
8248                                   int level_size, struct btrfs_key *drop_key)
8249 {
8250
8251         struct root_item_record *ri_rec;
8252         ri_rec = malloc(sizeof(*ri_rec));
8253         if (!ri_rec)
8254                 return -ENOMEM;
8255         ri_rec->bytenr = bytenr;
8256         ri_rec->objectid = objectid;
8257         ri_rec->level = level;
8258         ri_rec->level_size = level_size;
8259         ri_rec->drop_level = drop_level;
8260         ri_rec->last_snapshot = last_snapshot;
8261         if (drop_key)
8262                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8263         list_add_tail(&ri_rec->list, head);
8264
8265         return 0;
8266 }
8267
8268 static void free_root_item_list(struct list_head *list)
8269 {
8270         struct root_item_record *ri_rec;
8271
8272         while (!list_empty(list)) {
8273                 ri_rec = list_first_entry(list, struct root_item_record,
8274                                           list);
8275                 list_del_init(&ri_rec->list);
8276                 free(ri_rec);
8277         }
8278 }
8279
8280 static int deal_root_from_list(struct list_head *list,
8281                                struct btrfs_root *root,
8282                                struct block_info *bits,
8283                                int bits_nr,
8284                                struct cache_tree *pending,
8285                                struct cache_tree *seen,
8286                                struct cache_tree *reada,
8287                                struct cache_tree *nodes,
8288                                struct cache_tree *extent_cache,
8289                                struct cache_tree *chunk_cache,
8290                                struct rb_root *dev_cache,
8291                                struct block_group_tree *block_group_cache,
8292                                struct device_extent_tree *dev_extent_cache)
8293 {
8294         int ret = 0;
8295         u64 last;
8296
8297         while (!list_empty(list)) {
8298                 struct root_item_record *rec;
8299                 struct extent_buffer *buf;
8300                 rec = list_entry(list->next,
8301                                  struct root_item_record, list);
8302                 last = 0;
8303                 buf = read_tree_block(root->fs_info->tree_root,
8304                                       rec->bytenr, rec->level_size, 0);
8305                 if (!extent_buffer_uptodate(buf)) {
8306                         free_extent_buffer(buf);
8307                         ret = -EIO;
8308                         break;
8309                 }
8310                 add_root_to_pending(buf, extent_cache, pending,
8311                                     seen, nodes, rec->objectid);
8312                 /*
8313                  * To rebuild extent tree, we need deal with snapshot
8314                  * one by one, otherwise we deal with node firstly which
8315                  * can maximize readahead.
8316                  */
8317                 while (1) {
8318                         ret = run_next_block(root, bits, bits_nr, &last,
8319                                              pending, seen, reada, nodes,
8320                                              extent_cache, chunk_cache,
8321                                              dev_cache, block_group_cache,
8322                                              dev_extent_cache, rec);
8323                         if (ret != 0)
8324                                 break;
8325                 }
8326                 free_extent_buffer(buf);
8327                 list_del(&rec->list);
8328                 free(rec);
8329                 if (ret < 0)
8330                         break;
8331         }
8332         while (ret >= 0) {
8333                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8334                                      reada, nodes, extent_cache, chunk_cache,
8335                                      dev_cache, block_group_cache,
8336                                      dev_extent_cache, NULL);
8337                 if (ret != 0) {
8338                         if (ret > 0)
8339                                 ret = 0;
8340                         break;
8341                 }
8342         }
8343         return ret;
8344 }
8345
8346 static int check_chunks_and_extents(struct btrfs_root *root)
8347 {
8348         struct rb_root dev_cache;
8349         struct cache_tree chunk_cache;
8350         struct block_group_tree block_group_cache;
8351         struct device_extent_tree dev_extent_cache;
8352         struct cache_tree extent_cache;
8353         struct cache_tree seen;
8354         struct cache_tree pending;
8355         struct cache_tree reada;
8356         struct cache_tree nodes;
8357         struct extent_io_tree excluded_extents;
8358         struct cache_tree corrupt_blocks;
8359         struct btrfs_path path;
8360         struct btrfs_key key;
8361         struct btrfs_key found_key;
8362         int ret, err = 0;
8363         struct block_info *bits;
8364         int bits_nr;
8365         struct extent_buffer *leaf;
8366         int slot;
8367         struct btrfs_root_item ri;
8368         struct list_head dropping_trees;
8369         struct list_head normal_trees;
8370         struct btrfs_root *root1;
8371         u64 objectid;
8372         u32 level_size;
8373         u8 level;
8374
8375         dev_cache = RB_ROOT;
8376         cache_tree_init(&chunk_cache);
8377         block_group_tree_init(&block_group_cache);
8378         device_extent_tree_init(&dev_extent_cache);
8379
8380         cache_tree_init(&extent_cache);
8381         cache_tree_init(&seen);
8382         cache_tree_init(&pending);
8383         cache_tree_init(&nodes);
8384         cache_tree_init(&reada);
8385         cache_tree_init(&corrupt_blocks);
8386         extent_io_tree_init(&excluded_extents);
8387         INIT_LIST_HEAD(&dropping_trees);
8388         INIT_LIST_HEAD(&normal_trees);
8389
8390         if (repair) {
8391                 root->fs_info->excluded_extents = &excluded_extents;
8392                 root->fs_info->fsck_extent_cache = &extent_cache;
8393                 root->fs_info->free_extent_hook = free_extent_hook;
8394                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8395         }
8396
8397         bits_nr = 1024;
8398         bits = malloc(bits_nr * sizeof(struct block_info));
8399         if (!bits) {
8400                 perror("malloc");
8401                 exit(1);
8402         }
8403
8404         if (ctx.progress_enabled) {
8405                 ctx.tp = TASK_EXTENTS;
8406                 task_start(ctx.info);
8407         }
8408
8409 again:
8410         root1 = root->fs_info->tree_root;
8411         level = btrfs_header_level(root1->node);
8412         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8413                                     root1->node->start, 0, level, 0,
8414                                     root1->nodesize, NULL);
8415         if (ret < 0)
8416                 goto out;
8417         root1 = root->fs_info->chunk_root;
8418         level = btrfs_header_level(root1->node);
8419         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8420                                     root1->node->start, 0, level, 0,
8421                                     root1->nodesize, NULL);
8422         if (ret < 0)
8423                 goto out;
8424         btrfs_init_path(&path);
8425         key.offset = 0;
8426         key.objectid = 0;
8427         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8428         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8429                                         &key, &path, 0, 0);
8430         if (ret < 0)
8431                 goto out;
8432         while(1) {
8433                 leaf = path.nodes[0];
8434                 slot = path.slots[0];
8435                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8436                         ret = btrfs_next_leaf(root, &path);
8437                         if (ret != 0)
8438                                 break;
8439                         leaf = path.nodes[0];
8440                         slot = path.slots[0];
8441                 }
8442                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8443                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8444                         unsigned long offset;
8445                         u64 last_snapshot;
8446
8447                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8448                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8449                         last_snapshot = btrfs_root_last_snapshot(&ri);
8450                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8451                                 level = btrfs_root_level(&ri);
8452                                 level_size = root->nodesize;
8453                                 ret = add_root_item_to_list(&normal_trees,
8454                                                 found_key.objectid,
8455                                                 btrfs_root_bytenr(&ri),
8456                                                 last_snapshot, level,
8457                                                 0, level_size, NULL);
8458                                 if (ret < 0)
8459                                         goto out;
8460                         } else {
8461                                 level = btrfs_root_level(&ri);
8462                                 level_size = root->nodesize;
8463                                 objectid = found_key.objectid;
8464                                 btrfs_disk_key_to_cpu(&found_key,
8465                                                       &ri.drop_progress);
8466                                 ret = add_root_item_to_list(&dropping_trees,
8467                                                 objectid,
8468                                                 btrfs_root_bytenr(&ri),
8469                                                 last_snapshot, level,
8470                                                 ri.drop_level,
8471                                                 level_size, &found_key);
8472                                 if (ret < 0)
8473                                         goto out;
8474                         }
8475                 }
8476                 path.slots[0]++;
8477         }
8478         btrfs_release_path(&path);
8479
8480         /*
8481          * check_block can return -EAGAIN if it fixes something, please keep
8482          * this in mind when dealing with return values from these functions, if
8483          * we get -EAGAIN we want to fall through and restart the loop.
8484          */
8485         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8486                                   &seen, &reada, &nodes, &extent_cache,
8487                                   &chunk_cache, &dev_cache, &block_group_cache,
8488                                   &dev_extent_cache);
8489         if (ret < 0) {
8490                 if (ret == -EAGAIN)
8491                         goto loop;
8492                 goto out;
8493         }
8494         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8495                                   &pending, &seen, &reada, &nodes,
8496                                   &extent_cache, &chunk_cache, &dev_cache,
8497                                   &block_group_cache, &dev_extent_cache);
8498         if (ret < 0) {
8499                 if (ret == -EAGAIN)
8500                         goto loop;
8501                 goto out;
8502         }
8503
8504         ret = check_chunks(&chunk_cache, &block_group_cache,
8505                            &dev_extent_cache, NULL, NULL, NULL, 0);
8506         if (ret) {
8507                 if (ret == -EAGAIN)
8508                         goto loop;
8509                 err = ret;
8510         }
8511
8512         ret = check_extent_refs(root, &extent_cache);
8513         if (ret < 0) {
8514                 if (ret == -EAGAIN)
8515                         goto loop;
8516                 goto out;
8517         }
8518
8519         ret = check_devices(&dev_cache, &dev_extent_cache);
8520         if (ret && err)
8521                 ret = err;
8522
8523 out:
8524         task_stop(ctx.info);
8525         if (repair) {
8526                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8527                 extent_io_tree_cleanup(&excluded_extents);
8528                 root->fs_info->fsck_extent_cache = NULL;
8529                 root->fs_info->free_extent_hook = NULL;
8530                 root->fs_info->corrupt_blocks = NULL;
8531                 root->fs_info->excluded_extents = NULL;
8532         }
8533         free(bits);
8534         free_chunk_cache_tree(&chunk_cache);
8535         free_device_cache_tree(&dev_cache);
8536         free_block_group_tree(&block_group_cache);
8537         free_device_extent_tree(&dev_extent_cache);
8538         free_extent_cache_tree(&seen);
8539         free_extent_cache_tree(&pending);
8540         free_extent_cache_tree(&reada);
8541         free_extent_cache_tree(&nodes);
8542         return ret;
8543 loop:
8544         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8545         free_extent_cache_tree(&seen);
8546         free_extent_cache_tree(&pending);
8547         free_extent_cache_tree(&reada);
8548         free_extent_cache_tree(&nodes);
8549         free_chunk_cache_tree(&chunk_cache);
8550         free_block_group_tree(&block_group_cache);
8551         free_device_cache_tree(&dev_cache);
8552         free_device_extent_tree(&dev_extent_cache);
8553         free_extent_record_cache(root->fs_info, &extent_cache);
8554         free_root_item_list(&normal_trees);
8555         free_root_item_list(&dropping_trees);
8556         extent_io_tree_cleanup(&excluded_extents);
8557         goto again;
8558 }
8559
8560 /*
8561  * Check backrefs of a tree block given by @bytenr or @eb.
8562  *
8563  * @root:       the root containing the @bytenr or @eb
8564  * @eb:         tree block extent buffer, can be NULL
8565  * @bytenr:     bytenr of the tree block to search
8566  * @level:      tree level of the tree block
8567  * @owner:      owner of the tree block
8568  *
8569  * Return >0 for any error found and output error message
8570  * Return 0 for no error found
8571  */
8572 static int check_tree_block_ref(struct btrfs_root *root,
8573                                 struct extent_buffer *eb, u64 bytenr,
8574                                 int level, u64 owner)
8575 {
8576         struct btrfs_key key;
8577         struct btrfs_root *extent_root = root->fs_info->extent_root;
8578         struct btrfs_path path;
8579         struct btrfs_extent_item *ei;
8580         struct btrfs_extent_inline_ref *iref;
8581         struct extent_buffer *leaf;
8582         unsigned long end;
8583         unsigned long ptr;
8584         int slot;
8585         int skinny_level;
8586         int type;
8587         u32 nodesize = root->nodesize;
8588         u32 item_size;
8589         u64 offset;
8590         int found_ref = 0;
8591         int err = 0;
8592         int ret;
8593
8594         btrfs_init_path(&path);
8595         key.objectid = bytenr;
8596         if (btrfs_fs_incompat(root->fs_info,
8597                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8598                 key.type = BTRFS_METADATA_ITEM_KEY;
8599         else
8600                 key.type = BTRFS_EXTENT_ITEM_KEY;
8601         key.offset = (u64)-1;
8602
8603         /* Search for the backref in extent tree */
8604         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8605         if (ret < 0) {
8606                 err |= BACKREF_MISSING;
8607                 goto out;
8608         }
8609         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8610         if (ret) {
8611                 err |= BACKREF_MISSING;
8612                 goto out;
8613         }
8614
8615         leaf = path.nodes[0];
8616         slot = path.slots[0];
8617         btrfs_item_key_to_cpu(leaf, &key, slot);
8618
8619         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8620
8621         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8622                 skinny_level = (int)key.offset;
8623                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8624         } else {
8625                 struct btrfs_tree_block_info *info;
8626
8627                 info = (struct btrfs_tree_block_info *)(ei + 1);
8628                 skinny_level = btrfs_tree_block_level(leaf, info);
8629                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8630         }
8631
8632         if (eb) {
8633                 u64 header_gen;
8634                 u64 extent_gen;
8635
8636                 if (!(btrfs_extent_flags(leaf, ei) &
8637                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8638                         error(
8639                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8640                                 key.objectid, nodesize,
8641                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8642                         err = BACKREF_MISMATCH;
8643                 }
8644                 header_gen = btrfs_header_generation(eb);
8645                 extent_gen = btrfs_extent_generation(leaf, ei);
8646                 if (header_gen != extent_gen) {
8647                         error(
8648         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8649                                 key.objectid, nodesize, header_gen,
8650                                 extent_gen);
8651                         err = BACKREF_MISMATCH;
8652                 }
8653                 if (level != skinny_level) {
8654                         error(
8655                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8656                                 key.objectid, nodesize, level, skinny_level);
8657                         err = BACKREF_MISMATCH;
8658                 }
8659                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8660                         error(
8661                         "extent[%llu %u] is referred by other roots than %llu",
8662                                 key.objectid, nodesize, root->objectid);
8663                         err = BACKREF_MISMATCH;
8664                 }
8665         }
8666
8667         /*
8668          * Iterate the extent/metadata item to find the exact backref
8669          */
8670         item_size = btrfs_item_size_nr(leaf, slot);
8671         ptr = (unsigned long)iref;
8672         end = (unsigned long)ei + item_size;
8673         while (ptr < end) {
8674                 iref = (struct btrfs_extent_inline_ref *)ptr;
8675                 type = btrfs_extent_inline_ref_type(leaf, iref);
8676                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8677
8678                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8679                         (offset == root->objectid || offset == owner)) {
8680                         found_ref = 1;
8681                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8682                         /* Check if the backref points to valid referencer */
8683                         found_ref = !check_tree_block_ref(root, NULL, offset,
8684                                                           level + 1, owner);
8685                 }
8686
8687                 if (found_ref)
8688                         break;
8689                 ptr += btrfs_extent_inline_ref_size(type);
8690         }
8691
8692         /*
8693          * Inlined extent item doesn't have what we need, check
8694          * TREE_BLOCK_REF_KEY
8695          */
8696         if (!found_ref) {
8697                 btrfs_release_path(&path);
8698                 key.objectid = bytenr;
8699                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8700                 key.offset = root->objectid;
8701
8702                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8703                 if (!ret)
8704                         found_ref = 1;
8705         }
8706         if (!found_ref)
8707                 err |= BACKREF_MISSING;
8708 out:
8709         btrfs_release_path(&path);
8710         if (eb && (err & BACKREF_MISSING))
8711                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8712                         bytenr, nodesize, owner, level);
8713         return err;
8714 }
8715
8716 /*
8717  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8718  *
8719  * Return >0 any error found and output error message
8720  * Return 0 for no error found
8721  */
8722 static int check_extent_data_item(struct btrfs_root *root,
8723                                   struct extent_buffer *eb, int slot)
8724 {
8725         struct btrfs_file_extent_item *fi;
8726         struct btrfs_path path;
8727         struct btrfs_root *extent_root = root->fs_info->extent_root;
8728         struct btrfs_key fi_key;
8729         struct btrfs_key dbref_key;
8730         struct extent_buffer *leaf;
8731         struct btrfs_extent_item *ei;
8732         struct btrfs_extent_inline_ref *iref;
8733         struct btrfs_extent_data_ref *dref;
8734         u64 owner;
8735         u64 file_extent_gen;
8736         u64 disk_bytenr;
8737         u64 disk_num_bytes;
8738         u64 extent_num_bytes;
8739         u64 extent_flags;
8740         u64 extent_gen;
8741         u32 item_size;
8742         unsigned long end;
8743         unsigned long ptr;
8744         int type;
8745         u64 ref_root;
8746         int found_dbackref = 0;
8747         int err = 0;
8748         int ret;
8749
8750         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8751         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8752         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8753
8754         /* Nothing to check for hole and inline data extents */
8755         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8756             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8757                 return 0;
8758
8759         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8760         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8761         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8762
8763         /* Check unaligned disk_num_bytes and num_bytes */
8764         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8765                 error(
8766 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8767                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8768                         root->sectorsize);
8769                 err |= BYTES_UNALIGNED;
8770         } else {
8771                 data_bytes_allocated += disk_num_bytes;
8772         }
8773         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8774                 error(
8775 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8776                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8777                         root->sectorsize);
8778                 err |= BYTES_UNALIGNED;
8779         } else {
8780                 data_bytes_referenced += extent_num_bytes;
8781         }
8782         owner = btrfs_header_owner(eb);
8783
8784         /* Check the extent item of the file extent in extent tree */
8785         btrfs_init_path(&path);
8786         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8787         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8788         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8789
8790         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8791         if (ret) {
8792                 err |= BACKREF_MISSING;
8793                 goto error;
8794         }
8795
8796         leaf = path.nodes[0];
8797         slot = path.slots[0];
8798         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8799
8800         extent_flags = btrfs_extent_flags(leaf, ei);
8801         extent_gen = btrfs_extent_generation(leaf, ei);
8802
8803         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8804                 error(
8805                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8806                     disk_bytenr, disk_num_bytes,
8807                     BTRFS_EXTENT_FLAG_DATA);
8808                 err |= BACKREF_MISMATCH;
8809         }
8810
8811         if (file_extent_gen < extent_gen) {
8812                 error(
8813 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8814                         disk_bytenr, disk_num_bytes, file_extent_gen,
8815                         extent_gen);
8816                 err |= BACKREF_MISMATCH;
8817         }
8818
8819         /* Check data backref inside that extent item */
8820         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8821         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8822         ptr = (unsigned long)iref;
8823         end = (unsigned long)ei + item_size;
8824         while (ptr < end) {
8825                 iref = (struct btrfs_extent_inline_ref *)ptr;
8826                 type = btrfs_extent_inline_ref_type(leaf, iref);
8827                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8828
8829                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8830                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8831                         if (ref_root == owner || ref_root == root->objectid)
8832                                 found_dbackref = 1;
8833                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8834                         found_dbackref = !check_tree_block_ref(root, NULL,
8835                                 btrfs_extent_inline_ref_offset(leaf, iref),
8836                                 0, owner);
8837                 }
8838
8839                 if (found_dbackref)
8840                         break;
8841                 ptr += btrfs_extent_inline_ref_size(type);
8842         }
8843
8844         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8845         if (!found_dbackref) {
8846                 btrfs_release_path(&path);
8847
8848                 btrfs_init_path(&path);
8849                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8850                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8851                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8852                                 fi_key.objectid, fi_key.offset);
8853
8854                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8855                                         &dbref_key, &path, 0, 0);
8856                 if (!ret)
8857                         found_dbackref = 1;
8858         }
8859
8860         if (!found_dbackref)
8861                 err |= BACKREF_MISSING;
8862 error:
8863         btrfs_release_path(&path);
8864         if (err & BACKREF_MISSING) {
8865                 error("data extent[%llu %llu] backref lost",
8866                       disk_bytenr, disk_num_bytes);
8867         }
8868         return err;
8869 }
8870
8871 /*
8872  * Get real tree block level for the case like shared block
8873  * Return >= 0 as tree level
8874  * Return <0 for error
8875  */
8876 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8877 {
8878         struct extent_buffer *eb;
8879         struct btrfs_path path;
8880         struct btrfs_key key;
8881         struct btrfs_extent_item *ei;
8882         u64 flags;
8883         u64 transid;
8884         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8885         u8 backref_level;
8886         u8 header_level;
8887         int ret;
8888
8889         /* Search extent tree for extent generation and level */
8890         key.objectid = bytenr;
8891         key.type = BTRFS_METADATA_ITEM_KEY;
8892         key.offset = (u64)-1;
8893
8894         btrfs_init_path(&path);
8895         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8896         if (ret < 0)
8897                 goto release_out;
8898         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8899         if (ret < 0)
8900                 goto release_out;
8901         if (ret > 0) {
8902                 ret = -ENOENT;
8903                 goto release_out;
8904         }
8905
8906         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8907         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8908                             struct btrfs_extent_item);
8909         flags = btrfs_extent_flags(path.nodes[0], ei);
8910         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8911                 ret = -ENOENT;
8912                 goto release_out;
8913         }
8914
8915         /* Get transid for later read_tree_block() check */
8916         transid = btrfs_extent_generation(path.nodes[0], ei);
8917
8918         /* Get backref level as one source */
8919         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8920                 backref_level = key.offset;
8921         } else {
8922                 struct btrfs_tree_block_info *info;
8923
8924                 info = (struct btrfs_tree_block_info *)(ei + 1);
8925                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8926         }
8927         btrfs_release_path(&path);
8928
8929         /* Get level from tree block as an alternative source */
8930         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8931         if (!extent_buffer_uptodate(eb)) {
8932                 free_extent_buffer(eb);
8933                 return -EIO;
8934         }
8935         header_level = btrfs_header_level(eb);
8936         free_extent_buffer(eb);
8937
8938         if (header_level != backref_level)
8939                 return -EIO;
8940         return header_level;
8941
8942 release_out:
8943         btrfs_release_path(&path);
8944         return ret;
8945 }
8946
8947 /*
8948  * Check if a tree block backref is valid (points to a valid tree block)
8949  * if level == -1, level will be resolved
8950  * Return >0 for any error found and print error message
8951  */
8952 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8953                                     u64 bytenr, int level)
8954 {
8955         struct btrfs_root *root;
8956         struct btrfs_key key;
8957         struct btrfs_path path;
8958         struct extent_buffer *eb;
8959         struct extent_buffer *node;
8960         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8961         int err = 0;
8962         int ret;
8963
8964         /* Query level for level == -1 special case */
8965         if (level == -1)
8966                 level = query_tree_block_level(fs_info, bytenr);
8967         if (level < 0) {
8968                 err |= REFERENCER_MISSING;
8969                 goto out;
8970         }
8971
8972         key.objectid = root_id;
8973         key.type = BTRFS_ROOT_ITEM_KEY;
8974         key.offset = (u64)-1;
8975
8976         root = btrfs_read_fs_root(fs_info, &key);
8977         if (IS_ERR(root)) {
8978                 err |= REFERENCER_MISSING;
8979                 goto out;
8980         }
8981
8982         /* Read out the tree block to get item/node key */
8983         eb = read_tree_block(root, bytenr, root->nodesize, 0);
8984         if (!extent_buffer_uptodate(eb)) {
8985                 err |= REFERENCER_MISSING;
8986                 free_extent_buffer(eb);
8987                 goto out;
8988         }
8989
8990         /* Empty tree, no need to check key */
8991         if (!btrfs_header_nritems(eb) && !level) {
8992                 free_extent_buffer(eb);
8993                 goto out;
8994         }
8995
8996         if (level)
8997                 btrfs_node_key_to_cpu(eb, &key, 0);
8998         else
8999                 btrfs_item_key_to_cpu(eb, &key, 0);
9000
9001         free_extent_buffer(eb);
9002
9003         btrfs_init_path(&path);
9004         /* Search with the first key, to ensure we can reach it */
9005         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9006         if (ret) {
9007                 err |= REFERENCER_MISSING;
9008                 goto release_out;
9009         }
9010
9011         node = path.nodes[level];
9012         if (btrfs_header_bytenr(node) != bytenr) {
9013                 error(
9014         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9015                         bytenr, nodesize, bytenr,
9016                         btrfs_header_bytenr(node));
9017                 err |= REFERENCER_MISMATCH;
9018         }
9019         if (btrfs_header_level(node) != level) {
9020                 error(
9021         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9022                         bytenr, nodesize, level,
9023                         btrfs_header_level(node));
9024                 err |= REFERENCER_MISMATCH;
9025         }
9026
9027 release_out:
9028         btrfs_release_path(&path);
9029 out:
9030         if (err & REFERENCER_MISSING) {
9031                 if (level < 0)
9032                         error("extent [%llu %d] lost referencer (owner: %llu)",
9033                                 bytenr, nodesize, root_id);
9034                 else
9035                         error(
9036                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9037                                 bytenr, nodesize, root_id, level);
9038         }
9039
9040         return err;
9041 }
9042
9043 /*
9044  * Check referencer for shared block backref
9045  * If level == -1, this function will resolve the level.
9046  */
9047 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9048                                      u64 parent, u64 bytenr, int level)
9049 {
9050         struct extent_buffer *eb;
9051         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9052         u32 nr;
9053         int found_parent = 0;
9054         int i;
9055
9056         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9057         if (!extent_buffer_uptodate(eb))
9058                 goto out;
9059
9060         if (level == -1)
9061                 level = query_tree_block_level(fs_info, bytenr);
9062         if (level < 0)
9063                 goto out;
9064
9065         if (level + 1 != btrfs_header_level(eb))
9066                 goto out;
9067
9068         nr = btrfs_header_nritems(eb);
9069         for (i = 0; i < nr; i++) {
9070                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9071                         found_parent = 1;
9072                         break;
9073                 }
9074         }
9075 out:
9076         free_extent_buffer(eb);
9077         if (!found_parent) {
9078                 error(
9079         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9080                         bytenr, nodesize, parent, level);
9081                 return REFERENCER_MISSING;
9082         }
9083         return 0;
9084 }
9085
9086 /*
9087  * Check referencer for normal (inlined) data ref
9088  * If len == 0, it will be resolved by searching in extent tree
9089  */
9090 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9091                                      u64 root_id, u64 objectid, u64 offset,
9092                                      u64 bytenr, u64 len, u32 count)
9093 {
9094         struct btrfs_root *root;
9095         struct btrfs_root *extent_root = fs_info->extent_root;
9096         struct btrfs_key key;
9097         struct btrfs_path path;
9098         struct extent_buffer *leaf;
9099         struct btrfs_file_extent_item *fi;
9100         u32 found_count = 0;
9101         int slot;
9102         int ret = 0;
9103
9104         if (!len) {
9105                 key.objectid = bytenr;
9106                 key.type = BTRFS_EXTENT_ITEM_KEY;
9107                 key.offset = (u64)-1;
9108
9109                 btrfs_init_path(&path);
9110                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9111                 if (ret < 0)
9112                         goto out;
9113                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9114                 if (ret)
9115                         goto out;
9116                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9117                 if (key.objectid != bytenr ||
9118                     key.type != BTRFS_EXTENT_ITEM_KEY)
9119                         goto out;
9120                 len = key.offset;
9121                 btrfs_release_path(&path);
9122         }
9123         key.objectid = root_id;
9124         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9125         key.offset = (u64)-1;
9126         btrfs_init_path(&path);
9127
9128         root = btrfs_read_fs_root(fs_info, &key);
9129         if (IS_ERR(root))
9130                 goto out;
9131
9132         key.objectid = objectid;
9133         key.type = BTRFS_EXTENT_DATA_KEY;
9134         /*
9135          * It can be nasty as data backref offset is
9136          * file offset - file extent offset, which is smaller or
9137          * equal to original backref offset.  The only special case is
9138          * overflow.  So we need to special check and do further search.
9139          */
9140         key.offset = offset & (1ULL << 63) ? 0 : offset;
9141
9142         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9143         if (ret < 0)
9144                 goto out;
9145
9146         /*
9147          * Search afterwards to get correct one
9148          * NOTE: As we must do a comprehensive check on the data backref to
9149          * make sure the dref count also matches, we must iterate all file
9150          * extents for that inode.
9151          */
9152         while (1) {
9153                 leaf = path.nodes[0];
9154                 slot = path.slots[0];
9155
9156                 btrfs_item_key_to_cpu(leaf, &key, slot);
9157                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9158                         break;
9159                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9160                 /*
9161                  * Except normal disk bytenr and disk num bytes, we still
9162                  * need to do extra check on dbackref offset as
9163                  * dbackref offset = file_offset - file_extent_offset
9164                  */
9165                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9166                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9167                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9168                     offset)
9169                         found_count++;
9170
9171                 ret = btrfs_next_item(root, &path);
9172                 if (ret)
9173                         break;
9174         }
9175 out:
9176         btrfs_release_path(&path);
9177         if (found_count != count) {
9178                 error(
9179 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9180                         bytenr, len, root_id, objectid, offset, count, found_count);
9181                 return REFERENCER_MISSING;
9182         }
9183         return 0;
9184 }
9185
9186 /*
9187  * Check if the referencer of a shared data backref exists
9188  */
9189 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9190                                      u64 parent, u64 bytenr)
9191 {
9192         struct extent_buffer *eb;
9193         struct btrfs_key key;
9194         struct btrfs_file_extent_item *fi;
9195         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9196         u32 nr;
9197         int found_parent = 0;
9198         int i;
9199
9200         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9201         if (!extent_buffer_uptodate(eb))
9202                 goto out;
9203
9204         nr = btrfs_header_nritems(eb);
9205         for (i = 0; i < nr; i++) {
9206                 btrfs_item_key_to_cpu(eb, &key, i);
9207                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9208                         continue;
9209
9210                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9211                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9212                         continue;
9213
9214                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9215                         found_parent = 1;
9216                         break;
9217                 }
9218         }
9219
9220 out:
9221         free_extent_buffer(eb);
9222         if (!found_parent) {
9223                 error("shared extent %llu referencer lost (parent: %llu)",
9224                         bytenr, parent);
9225                 return REFERENCER_MISSING;
9226         }
9227         return 0;
9228 }
9229
9230 /*
9231  * This function will check a given extent item, including its backref and
9232  * itself (like crossing stripe boundary and type)
9233  *
9234  * Since we don't use extent_record anymore, introduce new error bit
9235  */
9236 static int check_extent_item(struct btrfs_fs_info *fs_info,
9237                              struct extent_buffer *eb, int slot)
9238 {
9239         struct btrfs_extent_item *ei;
9240         struct btrfs_extent_inline_ref *iref;
9241         struct btrfs_extent_data_ref *dref;
9242         unsigned long end;
9243         unsigned long ptr;
9244         int type;
9245         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9246         u32 item_size = btrfs_item_size_nr(eb, slot);
9247         u64 flags;
9248         u64 offset;
9249         int metadata = 0;
9250         int level;
9251         struct btrfs_key key;
9252         int ret;
9253         int err = 0;
9254
9255         btrfs_item_key_to_cpu(eb, &key, slot);
9256         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9257                 bytes_used += key.offset;
9258         else
9259                 bytes_used += nodesize;
9260
9261         if (item_size < sizeof(*ei)) {
9262                 /*
9263                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9264                  * old thing when on disk format is still un-determined.
9265                  * No need to care about it anymore
9266                  */
9267                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9268                 return -ENOTTY;
9269         }
9270
9271         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9272         flags = btrfs_extent_flags(eb, ei);
9273
9274         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9275                 metadata = 1;
9276         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9277                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9278                       key.objectid, key.objectid + nodesize);
9279                 err |= CROSSING_STRIPE_BOUNDARY;
9280         }
9281
9282         ptr = (unsigned long)(ei + 1);
9283
9284         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9285                 /* Old EXTENT_ITEM metadata */
9286                 struct btrfs_tree_block_info *info;
9287
9288                 info = (struct btrfs_tree_block_info *)ptr;
9289                 level = btrfs_tree_block_level(eb, info);
9290                 ptr += sizeof(struct btrfs_tree_block_info);
9291         } else {
9292                 /* New METADATA_ITEM */
9293                 level = key.offset;
9294         }
9295         end = (unsigned long)ei + item_size;
9296
9297         if (ptr >= end) {
9298                 err |= ITEM_SIZE_MISMATCH;
9299                 goto out;
9300         }
9301
9302         /* Now check every backref in this extent item */
9303 next:
9304         iref = (struct btrfs_extent_inline_ref *)ptr;
9305         type = btrfs_extent_inline_ref_type(eb, iref);
9306         offset = btrfs_extent_inline_ref_offset(eb, iref);
9307         switch (type) {
9308         case BTRFS_TREE_BLOCK_REF_KEY:
9309                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9310                                                level);
9311                 err |= ret;
9312                 break;
9313         case BTRFS_SHARED_BLOCK_REF_KEY:
9314                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9315                                                  level);
9316                 err |= ret;
9317                 break;
9318         case BTRFS_EXTENT_DATA_REF_KEY:
9319                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9320                 ret = check_extent_data_backref(fs_info,
9321                                 btrfs_extent_data_ref_root(eb, dref),
9322                                 btrfs_extent_data_ref_objectid(eb, dref),
9323                                 btrfs_extent_data_ref_offset(eb, dref),
9324                                 key.objectid, key.offset,
9325                                 btrfs_extent_data_ref_count(eb, dref));
9326                 err |= ret;
9327                 break;
9328         case BTRFS_SHARED_DATA_REF_KEY:
9329                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9330                 err |= ret;
9331                 break;
9332         default:
9333                 error("extent[%llu %d %llu] has unknown ref type: %d",
9334                         key.objectid, key.type, key.offset, type);
9335                 err |= UNKNOWN_TYPE;
9336                 goto out;
9337         }
9338
9339         ptr += btrfs_extent_inline_ref_size(type);
9340         if (ptr < end)
9341                 goto next;
9342
9343 out:
9344         return err;
9345 }
9346
9347 /*
9348  * Check if a dev extent item is referred correctly by its chunk
9349  */
9350 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9351                                  struct extent_buffer *eb, int slot)
9352 {
9353         struct btrfs_root *chunk_root = fs_info->chunk_root;
9354         struct btrfs_dev_extent *ptr;
9355         struct btrfs_path path;
9356         struct btrfs_key chunk_key;
9357         struct btrfs_key devext_key;
9358         struct btrfs_chunk *chunk;
9359         struct extent_buffer *l;
9360         int num_stripes;
9361         u64 length;
9362         int i;
9363         int found_chunk = 0;
9364         int ret;
9365
9366         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9367         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9368         length = btrfs_dev_extent_length(eb, ptr);
9369
9370         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9371         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9372         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9373
9374         btrfs_init_path(&path);
9375         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9376         if (ret)
9377                 goto out;
9378
9379         l = path.nodes[0];
9380         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9381         if (btrfs_chunk_length(l, chunk) != length)
9382                 goto out;
9383
9384         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9385         for (i = 0; i < num_stripes; i++) {
9386                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9387                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9388
9389                 if (devid == devext_key.objectid &&
9390                     offset == devext_key.offset) {
9391                         found_chunk = 1;
9392                         break;
9393                 }
9394         }
9395 out:
9396         btrfs_release_path(&path);
9397         if (!found_chunk) {
9398                 error(
9399                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9400                         devext_key.objectid, devext_key.offset, length);
9401                 return REFERENCER_MISSING;
9402         }
9403         return 0;
9404 }
9405
9406 /*
9407  * Check if the used space is correct with the dev item
9408  */
9409 static int check_dev_item(struct btrfs_fs_info *fs_info,
9410                           struct extent_buffer *eb, int slot)
9411 {
9412         struct btrfs_root *dev_root = fs_info->dev_root;
9413         struct btrfs_dev_item *dev_item;
9414         struct btrfs_path path;
9415         struct btrfs_key key;
9416         struct btrfs_dev_extent *ptr;
9417         u64 dev_id;
9418         u64 used;
9419         u64 total = 0;
9420         int ret;
9421
9422         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9423         dev_id = btrfs_device_id(eb, dev_item);
9424         used = btrfs_device_bytes_used(eb, dev_item);
9425
9426         key.objectid = dev_id;
9427         key.type = BTRFS_DEV_EXTENT_KEY;
9428         key.offset = 0;
9429
9430         btrfs_init_path(&path);
9431         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9432         if (ret < 0) {
9433                 btrfs_item_key_to_cpu(eb, &key, slot);
9434                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9435                         key.objectid, key.type, key.offset);
9436                 btrfs_release_path(&path);
9437                 return REFERENCER_MISSING;
9438         }
9439
9440         /* Iterate dev_extents to calculate the used space of a device */
9441         while (1) {
9442                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9443
9444                 if (key.objectid > dev_id)
9445                         break;
9446                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9447                         goto next;
9448
9449                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9450                                      struct btrfs_dev_extent);
9451                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9452 next:
9453                 ret = btrfs_next_item(dev_root, &path);
9454                 if (ret)
9455                         break;
9456         }
9457         btrfs_release_path(&path);
9458
9459         if (used != total) {
9460                 btrfs_item_key_to_cpu(eb, &key, slot);
9461                 error(
9462 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9463                         total, used, BTRFS_ROOT_TREE_OBJECTID,
9464                         BTRFS_DEV_EXTENT_KEY, dev_id);
9465                 return ACCOUNTING_MISMATCH;
9466         }
9467         return 0;
9468 }
9469
9470 /*
9471  * Check a block group item with its referener (chunk) and its used space
9472  * with extent/metadata item
9473  */
9474 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9475                                   struct extent_buffer *eb, int slot)
9476 {
9477         struct btrfs_root *extent_root = fs_info->extent_root;
9478         struct btrfs_root *chunk_root = fs_info->chunk_root;
9479         struct btrfs_block_group_item *bi;
9480         struct btrfs_block_group_item bg_item;
9481         struct btrfs_path path;
9482         struct btrfs_key bg_key;
9483         struct btrfs_key chunk_key;
9484         struct btrfs_key extent_key;
9485         struct btrfs_chunk *chunk;
9486         struct extent_buffer *leaf;
9487         struct btrfs_extent_item *ei;
9488         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9489         u64 flags;
9490         u64 bg_flags;
9491         u64 used;
9492         u64 total = 0;
9493         int ret;
9494         int err = 0;
9495
9496         btrfs_item_key_to_cpu(eb, &bg_key, slot);
9497         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9498         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9499         used = btrfs_block_group_used(&bg_item);
9500         bg_flags = btrfs_block_group_flags(&bg_item);
9501
9502         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9503         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9504         chunk_key.offset = bg_key.objectid;
9505
9506         btrfs_init_path(&path);
9507         /* Search for the referencer chunk */
9508         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9509         if (ret) {
9510                 error(
9511                 "block group[%llu %llu] did not find the related chunk item",
9512                         bg_key.objectid, bg_key.offset);
9513                 err |= REFERENCER_MISSING;
9514         } else {
9515                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9516                                         struct btrfs_chunk);
9517                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9518                                                 bg_key.offset) {
9519                         error(
9520         "block group[%llu %llu] related chunk item length does not match",
9521                                 bg_key.objectid, bg_key.offset);
9522                         err |= REFERENCER_MISMATCH;
9523                 }
9524         }
9525         btrfs_release_path(&path);
9526
9527         /* Search from the block group bytenr */
9528         extent_key.objectid = bg_key.objectid;
9529         extent_key.type = 0;
9530         extent_key.offset = 0;
9531
9532         btrfs_init_path(&path);
9533         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9534         if (ret < 0)
9535                 goto out;
9536
9537         /* Iterate extent tree to account used space */
9538         while (1) {
9539                 leaf = path.nodes[0];
9540                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9541                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9542                         break;
9543
9544                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9545                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9546                         goto next;
9547                 if (extent_key.objectid < bg_key.objectid)
9548                         goto next;
9549
9550                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9551                         total += nodesize;
9552                 else
9553                         total += extent_key.offset;
9554
9555                 ei = btrfs_item_ptr(leaf, path.slots[0],
9556                                     struct btrfs_extent_item);
9557                 flags = btrfs_extent_flags(leaf, ei);
9558                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9559                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9560                                 error(
9561                         "bad extent[%llu, %llu) type mismatch with chunk",
9562                                         extent_key.objectid,
9563                                         extent_key.objectid + extent_key.offset);
9564                                 err |= CHUNK_TYPE_MISMATCH;
9565                         }
9566                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9567                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9568                                     BTRFS_BLOCK_GROUP_METADATA))) {
9569                                 error(
9570                         "bad extent[%llu, %llu) type mismatch with chunk",
9571                                         extent_key.objectid,
9572                                         extent_key.objectid + nodesize);
9573                                 err |= CHUNK_TYPE_MISMATCH;
9574                         }
9575                 }
9576 next:
9577                 ret = btrfs_next_item(extent_root, &path);
9578                 if (ret)
9579                         break;
9580         }
9581
9582 out:
9583         btrfs_release_path(&path);
9584
9585         if (total != used) {
9586                 error(
9587                 "block group[%llu %llu] used %llu but extent items used %llu",
9588                         bg_key.objectid, bg_key.offset, used, total);
9589                 err |= ACCOUNTING_MISMATCH;
9590         }
9591         return err;
9592 }
9593
9594 /*
9595  * Check a chunk item.
9596  * Including checking all referred dev_extents and block group
9597  */
9598 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9599                             struct extent_buffer *eb, int slot)
9600 {
9601         struct btrfs_root *extent_root = fs_info->extent_root;
9602         struct btrfs_root *dev_root = fs_info->dev_root;
9603         struct btrfs_path path;
9604         struct btrfs_key chunk_key;
9605         struct btrfs_key bg_key;
9606         struct btrfs_key devext_key;
9607         struct btrfs_chunk *chunk;
9608         struct extent_buffer *leaf;
9609         struct btrfs_block_group_item *bi;
9610         struct btrfs_block_group_item bg_item;
9611         struct btrfs_dev_extent *ptr;
9612         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9613         u64 length;
9614         u64 chunk_end;
9615         u64 type;
9616         u64 profile;
9617         int num_stripes;
9618         u64 offset;
9619         u64 objectid;
9620         int i;
9621         int ret;
9622         int err = 0;
9623
9624         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9625         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9626         length = btrfs_chunk_length(eb, chunk);
9627         chunk_end = chunk_key.offset + length;
9628         if (!IS_ALIGNED(length, sectorsize)) {
9629                 error("chunk[%llu %llu) not aligned to %u",
9630                         chunk_key.offset, chunk_end, sectorsize);
9631                 err |= BYTES_UNALIGNED;
9632                 goto out;
9633         }
9634
9635         type = btrfs_chunk_type(eb, chunk);
9636         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9637         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9638                 error("chunk[%llu %llu) has no chunk type",
9639                         chunk_key.offset, chunk_end);
9640                 err |= UNKNOWN_TYPE;
9641         }
9642         if (profile && (profile & (profile - 1))) {
9643                 error("chunk[%llu %llu) multiple profiles detected: %llx",
9644                         chunk_key.offset, chunk_end, profile);
9645                 err |= UNKNOWN_TYPE;
9646         }
9647
9648         bg_key.objectid = chunk_key.offset;
9649         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9650         bg_key.offset = length;
9651
9652         btrfs_init_path(&path);
9653         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9654         if (ret) {
9655                 error(
9656                 "chunk[%llu %llu) did not find the related block group item",
9657                         chunk_key.offset, chunk_end);
9658                 err |= REFERENCER_MISSING;
9659         } else{
9660                 leaf = path.nodes[0];
9661                 bi = btrfs_item_ptr(leaf, path.slots[0],
9662                                     struct btrfs_block_group_item);
9663                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9664                                    sizeof(bg_item));
9665                 if (btrfs_block_group_flags(&bg_item) != type) {
9666                         error(
9667 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9668                                 chunk_key.offset, chunk_end, type,
9669                                 btrfs_block_group_flags(&bg_item));
9670                         err |= REFERENCER_MISSING;
9671                 }
9672         }
9673
9674         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9675         for (i = 0; i < num_stripes; i++) {
9676                 btrfs_release_path(&path);
9677                 btrfs_init_path(&path);
9678                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9679                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9680                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9681
9682                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9683                                         0, 0);
9684                 if (ret)
9685                         goto not_match_dev;
9686
9687                 leaf = path.nodes[0];
9688                 ptr = btrfs_item_ptr(leaf, path.slots[0],
9689                                      struct btrfs_dev_extent);
9690                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9691                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9692                 if (objectid != chunk_key.objectid ||
9693                     offset != chunk_key.offset ||
9694                     btrfs_dev_extent_length(leaf, ptr) != length)
9695                         goto not_match_dev;
9696                 continue;
9697 not_match_dev:
9698                 err |= BACKREF_MISSING;
9699                 error(
9700                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9701                         chunk_key.objectid, chunk_end, i);
9702                 continue;
9703         }
9704         btrfs_release_path(&path);
9705 out:
9706         return err;
9707 }
9708
9709 /*
9710  * Main entry function to check known items and update related accounting info
9711  */
9712 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9713 {
9714         struct btrfs_fs_info *fs_info = root->fs_info;
9715         struct btrfs_key key;
9716         int slot = 0;
9717         int type;
9718         struct btrfs_extent_data_ref *dref;
9719         int ret;
9720         int err = 0;
9721
9722 next:
9723         btrfs_item_key_to_cpu(eb, &key, slot);
9724         type = btrfs_key_type(&key);
9725
9726         switch (type) {
9727         case BTRFS_EXTENT_DATA_KEY:
9728                 ret = check_extent_data_item(root, eb, slot);
9729                 err |= ret;
9730                 break;
9731         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9732                 ret = check_block_group_item(fs_info, eb, slot);
9733                 err |= ret;
9734                 break;
9735         case BTRFS_DEV_ITEM_KEY:
9736                 ret = check_dev_item(fs_info, eb, slot);
9737                 err |= ret;
9738                 break;
9739         case BTRFS_CHUNK_ITEM_KEY:
9740                 ret = check_chunk_item(fs_info, eb, slot);
9741                 err |= ret;
9742                 break;
9743         case BTRFS_DEV_EXTENT_KEY:
9744                 ret = check_dev_extent_item(fs_info, eb, slot);
9745                 err |= ret;
9746                 break;
9747         case BTRFS_EXTENT_ITEM_KEY:
9748         case BTRFS_METADATA_ITEM_KEY:
9749                 ret = check_extent_item(fs_info, eb, slot);
9750                 err |= ret;
9751                 break;
9752         case BTRFS_EXTENT_CSUM_KEY:
9753                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9754                 break;
9755         case BTRFS_TREE_BLOCK_REF_KEY:
9756                 ret = check_tree_block_backref(fs_info, key.offset,
9757                                                key.objectid, -1);
9758                 err |= ret;
9759                 break;
9760         case BTRFS_EXTENT_DATA_REF_KEY:
9761                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9762                 ret = check_extent_data_backref(fs_info,
9763                                 btrfs_extent_data_ref_root(eb, dref),
9764                                 btrfs_extent_data_ref_objectid(eb, dref),
9765                                 btrfs_extent_data_ref_offset(eb, dref),
9766                                 key.objectid, 0,
9767                                 btrfs_extent_data_ref_count(eb, dref));
9768                 err |= ret;
9769                 break;
9770         case BTRFS_SHARED_BLOCK_REF_KEY:
9771                 ret = check_shared_block_backref(fs_info, key.offset,
9772                                                  key.objectid, -1);
9773                 err |= ret;
9774                 break;
9775         case BTRFS_SHARED_DATA_REF_KEY:
9776                 ret = check_shared_data_backref(fs_info, key.offset,
9777                                                 key.objectid);
9778                 err |= ret;
9779                 break;
9780         default:
9781                 break;
9782         }
9783
9784         if (++slot < btrfs_header_nritems(eb))
9785                 goto next;
9786
9787         return err;
9788 }
9789
9790 /*
9791  * Helper function for later fs/subvol tree check.  To determine if a tree
9792  * block should be checked.
9793  * This function will ensure only the direct referencer with lowest rootid to
9794  * check a fs/subvolume tree block.
9795  *
9796  * Backref check at extent tree would detect errors like missing subvolume
9797  * tree, so we can do aggressive check to reduce duplicated checks.
9798  */
9799 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9800 {
9801         struct btrfs_root *extent_root = root->fs_info->extent_root;
9802         struct btrfs_key key;
9803         struct btrfs_path path;
9804         struct extent_buffer *leaf;
9805         int slot;
9806         struct btrfs_extent_item *ei;
9807         unsigned long ptr;
9808         unsigned long end;
9809         int type;
9810         u32 item_size;
9811         u64 offset;
9812         struct btrfs_extent_inline_ref *iref;
9813         int ret;
9814
9815         btrfs_init_path(&path);
9816         key.objectid = btrfs_header_bytenr(eb);
9817         key.type = BTRFS_METADATA_ITEM_KEY;
9818         key.offset = (u64)-1;
9819
9820         /*
9821          * Any failure in backref resolving means we can't determine
9822          * whom the tree block belongs to.
9823          * So in that case, we need to check that tree block
9824          */
9825         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9826         if (ret < 0)
9827                 goto need_check;
9828
9829         ret = btrfs_previous_extent_item(extent_root, &path,
9830                                          btrfs_header_bytenr(eb));
9831         if (ret)
9832                 goto need_check;
9833
9834         leaf = path.nodes[0];
9835         slot = path.slots[0];
9836         btrfs_item_key_to_cpu(leaf, &key, slot);
9837         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9838
9839         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9840                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9841         } else {
9842                 struct btrfs_tree_block_info *info;
9843
9844                 info = (struct btrfs_tree_block_info *)(ei + 1);
9845                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9846         }
9847
9848         item_size = btrfs_item_size_nr(leaf, slot);
9849         ptr = (unsigned long)iref;
9850         end = (unsigned long)ei + item_size;
9851         while (ptr < end) {
9852                 iref = (struct btrfs_extent_inline_ref *)ptr;
9853                 type = btrfs_extent_inline_ref_type(leaf, iref);
9854                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9855
9856                 /*
9857                  * We only check the tree block if current root is
9858                  * the lowest referencer of it.
9859                  */
9860                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9861                     offset < root->objectid) {
9862                         btrfs_release_path(&path);
9863                         return 0;
9864                 }
9865
9866                 ptr += btrfs_extent_inline_ref_size(type);
9867         }
9868         /*
9869          * Normally we should also check keyed tree block ref, but that may be
9870          * very time consuming.  Inlined ref should already make us skip a lot
9871          * of refs now.  So skip search keyed tree block ref.
9872          */
9873
9874 need_check:
9875         btrfs_release_path(&path);
9876         return 1;
9877 }
9878
9879 /*
9880  * Traversal function for tree block. We will do:
9881  * 1) Skip shared fs/subvolume tree blocks
9882  * 2) Update related bytes accounting
9883  * 3) Pre-order traversal
9884  */
9885 static int traverse_tree_block(struct btrfs_root *root,
9886                                 struct extent_buffer *node)
9887 {
9888         struct extent_buffer *eb;
9889         int level;
9890         u64 nr;
9891         int i;
9892         int err = 0;
9893         int ret;
9894
9895         /*
9896          * Skip shared fs/subvolume tree block, in that case they will
9897          * be checked by referencer with lowest rootid
9898          */
9899         if (is_fstree(root->objectid) && !should_check(root, node))
9900                 return 0;
9901
9902         /* Update bytes accounting */
9903         total_btree_bytes += node->len;
9904         if (fs_root_objectid(btrfs_header_owner(node)))
9905                 total_fs_tree_bytes += node->len;
9906         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9907                 total_extent_tree_bytes += node->len;
9908         if (!found_old_backref &&
9909             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9910             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9911             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9912                 found_old_backref = 1;
9913
9914         /* pre-order tranversal, check itself first */
9915         level = btrfs_header_level(node);
9916         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9917                                    btrfs_header_level(node),
9918                                    btrfs_header_owner(node));
9919         err |= ret;
9920         if (err)
9921                 error(
9922         "check %s failed root %llu bytenr %llu level %d, force continue check",
9923                         level ? "node":"leaf", root->objectid,
9924                         btrfs_header_bytenr(node), btrfs_header_level(node));
9925
9926         if (!level) {
9927                 btree_space_waste += btrfs_leaf_free_space(root, node);
9928                 ret = check_leaf_items(root, node);
9929                 err |= ret;
9930                 return err;
9931         }
9932
9933         nr = btrfs_header_nritems(node);
9934         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
9935                 sizeof(struct btrfs_key_ptr);
9936
9937         /* Then check all its children */
9938         for (i = 0; i < nr; i++) {
9939                 u64 blocknr = btrfs_node_blockptr(node, i);
9940
9941                 /*
9942                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
9943                  * to call the function itself.
9944                  */
9945                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
9946                 if (extent_buffer_uptodate(eb)) {
9947                         ret = traverse_tree_block(root, eb);
9948                         err |= ret;
9949                 }
9950                 free_extent_buffer(eb);
9951         }
9952
9953         return err;
9954 }
9955
9956 /*
9957  * Low memory usage version check_chunks_and_extents.
9958  */
9959 static int check_chunks_and_extents_v2(struct btrfs_root *root)
9960 {
9961         struct btrfs_path path;
9962         struct btrfs_key key;
9963         struct btrfs_root *root1;
9964         struct btrfs_root *cur_root;
9965         int err = 0;
9966         int ret;
9967
9968         root1 = root->fs_info->chunk_root;
9969         ret = traverse_tree_block(root1, root1->node);
9970         err |= ret;
9971
9972         root1 = root->fs_info->tree_root;
9973         ret = traverse_tree_block(root1, root1->node);
9974         err |= ret;
9975
9976         btrfs_init_path(&path);
9977         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
9978         key.offset = 0;
9979         key.type = BTRFS_ROOT_ITEM_KEY;
9980
9981         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
9982         if (ret) {
9983                 error("cannot find extent treet in tree_root");
9984                 goto out;
9985         }
9986
9987         while (1) {
9988                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9989                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9990                         goto next;
9991                 key.offset = (u64)-1;
9992
9993                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
9994                 if (IS_ERR(cur_root) || !cur_root) {
9995                         error("failed to read tree: %lld", key.objectid);
9996                         goto next;
9997                 }
9998
9999                 ret = traverse_tree_block(cur_root, cur_root->node);
10000                 err |= ret;
10001
10002 next:
10003                 ret = btrfs_next_item(root1, &path);
10004                 if (ret)
10005                         goto out;
10006         }
10007
10008 out:
10009         btrfs_release_path(&path);
10010         return err;
10011 }
10012
10013 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10014                            struct btrfs_root *root, int overwrite)
10015 {
10016         struct extent_buffer *c;
10017         struct extent_buffer *old = root->node;
10018         int level;
10019         int ret;
10020         struct btrfs_disk_key disk_key = {0,0,0};
10021
10022         level = 0;
10023
10024         if (overwrite) {
10025                 c = old;
10026                 extent_buffer_get(c);
10027                 goto init;
10028         }
10029         c = btrfs_alloc_free_block(trans, root,
10030                                    root->nodesize,
10031                                    root->root_key.objectid,
10032                                    &disk_key, level, 0, 0);
10033         if (IS_ERR(c)) {
10034                 c = old;
10035                 extent_buffer_get(c);
10036                 overwrite = 1;
10037         }
10038 init:
10039         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10040         btrfs_set_header_level(c, level);
10041         btrfs_set_header_bytenr(c, c->start);
10042         btrfs_set_header_generation(c, trans->transid);
10043         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10044         btrfs_set_header_owner(c, root->root_key.objectid);
10045
10046         write_extent_buffer(c, root->fs_info->fsid,
10047                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
10048
10049         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10050                             btrfs_header_chunk_tree_uuid(c),
10051                             BTRFS_UUID_SIZE);
10052
10053         btrfs_mark_buffer_dirty(c);
10054         /*
10055          * this case can happen in the following case:
10056          *
10057          * 1.overwrite previous root.
10058          *
10059          * 2.reinit reloc data root, this is because we skip pin
10060          * down reloc data tree before which means we can allocate
10061          * same block bytenr here.
10062          */
10063         if (old->start == c->start) {
10064                 btrfs_set_root_generation(&root->root_item,
10065                                           trans->transid);
10066                 root->root_item.level = btrfs_header_level(root->node);
10067                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10068                                         &root->root_key, &root->root_item);
10069                 if (ret) {
10070                         free_extent_buffer(c);
10071                         return ret;
10072                 }
10073         }
10074         free_extent_buffer(old);
10075         root->node = c;
10076         add_root_to_dirty_list(root);
10077         return 0;
10078 }
10079
10080 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10081                                 struct extent_buffer *eb, int tree_root)
10082 {
10083         struct extent_buffer *tmp;
10084         struct btrfs_root_item *ri;
10085         struct btrfs_key key;
10086         u64 bytenr;
10087         u32 nodesize;
10088         int level = btrfs_header_level(eb);
10089         int nritems;
10090         int ret;
10091         int i;
10092
10093         /*
10094          * If we have pinned this block before, don't pin it again.
10095          * This can not only avoid forever loop with broken filesystem
10096          * but also give us some speedups.
10097          */
10098         if (test_range_bit(&fs_info->pinned_extents, eb->start,
10099                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10100                 return 0;
10101
10102         btrfs_pin_extent(fs_info, eb->start, eb->len);
10103
10104         nodesize = btrfs_super_nodesize(fs_info->super_copy);
10105         nritems = btrfs_header_nritems(eb);
10106         for (i = 0; i < nritems; i++) {
10107                 if (level == 0) {
10108                         btrfs_item_key_to_cpu(eb, &key, i);
10109                         if (key.type != BTRFS_ROOT_ITEM_KEY)
10110                                 continue;
10111                         /* Skip the extent root and reloc roots */
10112                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10113                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10114                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10115                                 continue;
10116                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10117                         bytenr = btrfs_disk_root_bytenr(eb, ri);
10118
10119                         /*
10120                          * If at any point we start needing the real root we
10121                          * will have to build a stump root for the root we are
10122                          * in, but for now this doesn't actually use the root so
10123                          * just pass in extent_root.
10124                          */
10125                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10126                                               nodesize, 0);
10127                         if (!extent_buffer_uptodate(tmp)) {
10128                                 fprintf(stderr, "Error reading root block\n");
10129                                 return -EIO;
10130                         }
10131                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
10132                         free_extent_buffer(tmp);
10133                         if (ret)
10134                                 return ret;
10135                 } else {
10136                         bytenr = btrfs_node_blockptr(eb, i);
10137
10138                         /* If we aren't the tree root don't read the block */
10139                         if (level == 1 && !tree_root) {
10140                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
10141                                 continue;
10142                         }
10143
10144                         tmp = read_tree_block(fs_info->extent_root, bytenr,
10145                                               nodesize, 0);
10146                         if (!extent_buffer_uptodate(tmp)) {
10147                                 fprintf(stderr, "Error reading tree block\n");
10148                                 return -EIO;
10149                         }
10150                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10151                         free_extent_buffer(tmp);
10152                         if (ret)
10153                                 return ret;
10154                 }
10155         }
10156
10157         return 0;
10158 }
10159
10160 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10161 {
10162         int ret;
10163
10164         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10165         if (ret)
10166                 return ret;
10167
10168         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10169 }
10170
10171 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10172 {
10173         struct btrfs_block_group_cache *cache;
10174         struct btrfs_path *path;
10175         struct extent_buffer *leaf;
10176         struct btrfs_chunk *chunk;
10177         struct btrfs_key key;
10178         int ret;
10179         u64 start;
10180
10181         path = btrfs_alloc_path();
10182         if (!path)
10183                 return -ENOMEM;
10184
10185         key.objectid = 0;
10186         key.type = BTRFS_CHUNK_ITEM_KEY;
10187         key.offset = 0;
10188
10189         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10190         if (ret < 0) {
10191                 btrfs_free_path(path);
10192                 return ret;
10193         }
10194
10195         /*
10196          * We do this in case the block groups were screwed up and had alloc
10197          * bits that aren't actually set on the chunks.  This happens with
10198          * restored images every time and could happen in real life I guess.
10199          */
10200         fs_info->avail_data_alloc_bits = 0;
10201         fs_info->avail_metadata_alloc_bits = 0;
10202         fs_info->avail_system_alloc_bits = 0;
10203
10204         /* First we need to create the in-memory block groups */
10205         while (1) {
10206                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10207                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
10208                         if (ret < 0) {
10209                                 btrfs_free_path(path);
10210                                 return ret;
10211                         }
10212                         if (ret) {
10213                                 ret = 0;
10214                                 break;
10215                         }
10216                 }
10217                 leaf = path->nodes[0];
10218                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10219                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10220                         path->slots[0]++;
10221                         continue;
10222                 }
10223
10224                 chunk = btrfs_item_ptr(leaf, path->slots[0],
10225                                        struct btrfs_chunk);
10226                 btrfs_add_block_group(fs_info, 0,
10227                                       btrfs_chunk_type(leaf, chunk),
10228                                       key.objectid, key.offset,
10229                                       btrfs_chunk_length(leaf, chunk));
10230                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10231                                  key.offset + btrfs_chunk_length(leaf, chunk),
10232                                  GFP_NOFS);
10233                 path->slots[0]++;
10234         }
10235         start = 0;
10236         while (1) {
10237                 cache = btrfs_lookup_first_block_group(fs_info, start);
10238                 if (!cache)
10239                         break;
10240                 cache->cached = 1;
10241                 start = cache->key.objectid + cache->key.offset;
10242         }
10243
10244         btrfs_free_path(path);
10245         return 0;
10246 }
10247
10248 static int reset_balance(struct btrfs_trans_handle *trans,
10249                          struct btrfs_fs_info *fs_info)
10250 {
10251         struct btrfs_root *root = fs_info->tree_root;
10252         struct btrfs_path *path;
10253         struct extent_buffer *leaf;
10254         struct btrfs_key key;
10255         int del_slot, del_nr = 0;
10256         int ret;
10257         int found = 0;
10258
10259         path = btrfs_alloc_path();
10260         if (!path)
10261                 return -ENOMEM;
10262
10263         key.objectid = BTRFS_BALANCE_OBJECTID;
10264         key.type = BTRFS_BALANCE_ITEM_KEY;
10265         key.offset = 0;
10266
10267         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10268         if (ret) {
10269                 if (ret > 0)
10270                         ret = 0;
10271                 if (!ret)
10272                         goto reinit_data_reloc;
10273                 else
10274                         goto out;
10275         }
10276
10277         ret = btrfs_del_item(trans, root, path);
10278         if (ret)
10279                 goto out;
10280         btrfs_release_path(path);
10281
10282         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10283         key.type = BTRFS_ROOT_ITEM_KEY;
10284         key.offset = 0;
10285
10286         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10287         if (ret < 0)
10288                 goto out;
10289         while (1) {
10290                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10291                         if (!found)
10292                                 break;
10293
10294                         if (del_nr) {
10295                                 ret = btrfs_del_items(trans, root, path,
10296                                                       del_slot, del_nr);
10297                                 del_nr = 0;
10298                                 if (ret)
10299                                         goto out;
10300                         }
10301                         key.offset++;
10302                         btrfs_release_path(path);
10303
10304                         found = 0;
10305                         ret = btrfs_search_slot(trans, root, &key, path,
10306                                                 -1, 1);
10307                         if (ret < 0)
10308                                 goto out;
10309                         continue;
10310                 }
10311                 found = 1;
10312                 leaf = path->nodes[0];
10313                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10314                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10315                         break;
10316                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10317                         path->slots[0]++;
10318                         continue;
10319                 }
10320                 if (!del_nr) {
10321                         del_slot = path->slots[0];
10322                         del_nr = 1;
10323                 } else {
10324                         del_nr++;
10325                 }
10326                 path->slots[0]++;
10327         }
10328
10329         if (del_nr) {
10330                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10331                 if (ret)
10332                         goto out;
10333         }
10334         btrfs_release_path(path);
10335
10336 reinit_data_reloc:
10337         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10338         key.type = BTRFS_ROOT_ITEM_KEY;
10339         key.offset = (u64)-1;
10340         root = btrfs_read_fs_root(fs_info, &key);
10341         if (IS_ERR(root)) {
10342                 fprintf(stderr, "Error reading data reloc tree\n");
10343                 ret = PTR_ERR(root);
10344                 goto out;
10345         }
10346         record_root_in_trans(trans, root);
10347         ret = btrfs_fsck_reinit_root(trans, root, 0);
10348         if (ret)
10349                 goto out;
10350         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10351 out:
10352         btrfs_free_path(path);
10353         return ret;
10354 }
10355
10356 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10357                               struct btrfs_fs_info *fs_info)
10358 {
10359         u64 start = 0;
10360         int ret;
10361
10362         /*
10363          * The only reason we don't do this is because right now we're just
10364          * walking the trees we find and pinning down their bytes, we don't look
10365          * at any of the leaves.  In order to do mixed groups we'd have to check
10366          * the leaves of any fs roots and pin down the bytes for any file
10367          * extents we find.  Not hard but why do it if we don't have to?
10368          */
10369         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10370                 fprintf(stderr, "We don't support re-initing the extent tree "
10371                         "for mixed block groups yet, please notify a btrfs "
10372                         "developer you want to do this so they can add this "
10373                         "functionality.\n");
10374                 return -EINVAL;
10375         }
10376
10377         /*
10378          * first we need to walk all of the trees except the extent tree and pin
10379          * down the bytes that are in use so we don't overwrite any existing
10380          * metadata.
10381          */
10382         ret = pin_metadata_blocks(fs_info);
10383         if (ret) {
10384                 fprintf(stderr, "error pinning down used bytes\n");
10385                 return ret;
10386         }
10387
10388         /*
10389          * Need to drop all the block groups since we're going to recreate all
10390          * of them again.
10391          */
10392         btrfs_free_block_groups(fs_info);
10393         ret = reset_block_groups(fs_info);
10394         if (ret) {
10395                 fprintf(stderr, "error resetting the block groups\n");
10396                 return ret;
10397         }
10398
10399         /* Ok we can allocate now, reinit the extent root */
10400         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10401         if (ret) {
10402                 fprintf(stderr, "extent root initialization failed\n");
10403                 /*
10404                  * When the transaction code is updated we should end the
10405                  * transaction, but for now progs only knows about commit so
10406                  * just return an error.
10407                  */
10408                 return ret;
10409         }
10410
10411         /*
10412          * Now we have all the in-memory block groups setup so we can make
10413          * allocations properly, and the metadata we care about is safe since we
10414          * pinned all of it above.
10415          */
10416         while (1) {
10417                 struct btrfs_block_group_cache *cache;
10418
10419                 cache = btrfs_lookup_first_block_group(fs_info, start);
10420                 if (!cache)
10421                         break;
10422                 start = cache->key.objectid + cache->key.offset;
10423                 ret = btrfs_insert_item(trans, fs_info->extent_root,
10424                                         &cache->key, &cache->item,
10425                                         sizeof(cache->item));
10426                 if (ret) {
10427                         fprintf(stderr, "Error adding block group\n");
10428                         return ret;
10429                 }
10430                 btrfs_extent_post_op(trans, fs_info->extent_root);
10431         }
10432
10433         ret = reset_balance(trans, fs_info);
10434         if (ret)
10435                 fprintf(stderr, "error resetting the pending balance\n");
10436
10437         return ret;
10438 }
10439
10440 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10441 {
10442         struct btrfs_path *path;
10443         struct btrfs_trans_handle *trans;
10444         struct btrfs_key key;
10445         int ret;
10446
10447         printf("Recowing metadata block %llu\n", eb->start);
10448         key.objectid = btrfs_header_owner(eb);
10449         key.type = BTRFS_ROOT_ITEM_KEY;
10450         key.offset = (u64)-1;
10451
10452         root = btrfs_read_fs_root(root->fs_info, &key);
10453         if (IS_ERR(root)) {
10454                 fprintf(stderr, "Couldn't find owner root %llu\n",
10455                         key.objectid);
10456                 return PTR_ERR(root);
10457         }
10458
10459         path = btrfs_alloc_path();
10460         if (!path)
10461                 return -ENOMEM;
10462
10463         trans = btrfs_start_transaction(root, 1);
10464         if (IS_ERR(trans)) {
10465                 btrfs_free_path(path);
10466                 return PTR_ERR(trans);
10467         }
10468
10469         path->lowest_level = btrfs_header_level(eb);
10470         if (path->lowest_level)
10471                 btrfs_node_key_to_cpu(eb, &key, 0);
10472         else
10473                 btrfs_item_key_to_cpu(eb, &key, 0);
10474
10475         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10476         btrfs_commit_transaction(trans, root);
10477         btrfs_free_path(path);
10478         return ret;
10479 }
10480
10481 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10482 {
10483         struct btrfs_path *path;
10484         struct btrfs_trans_handle *trans;
10485         struct btrfs_key key;
10486         int ret;
10487
10488         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10489                bad->key.type, bad->key.offset);
10490         key.objectid = bad->root_id;
10491         key.type = BTRFS_ROOT_ITEM_KEY;
10492         key.offset = (u64)-1;
10493
10494         root = btrfs_read_fs_root(root->fs_info, &key);
10495         if (IS_ERR(root)) {
10496                 fprintf(stderr, "Couldn't find owner root %llu\n",
10497                         key.objectid);
10498                 return PTR_ERR(root);
10499         }
10500
10501         path = btrfs_alloc_path();
10502         if (!path)
10503                 return -ENOMEM;
10504
10505         trans = btrfs_start_transaction(root, 1);
10506         if (IS_ERR(trans)) {
10507                 btrfs_free_path(path);
10508                 return PTR_ERR(trans);
10509         }
10510
10511         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10512         if (ret) {
10513                 if (ret > 0)
10514                         ret = 0;
10515                 goto out;
10516         }
10517         ret = btrfs_del_item(trans, root, path);
10518 out:
10519         btrfs_commit_transaction(trans, root);
10520         btrfs_free_path(path);
10521         return ret;
10522 }
10523
10524 static int zero_log_tree(struct btrfs_root *root)
10525 {
10526         struct btrfs_trans_handle *trans;
10527         int ret;
10528
10529         trans = btrfs_start_transaction(root, 1);
10530         if (IS_ERR(trans)) {
10531                 ret = PTR_ERR(trans);
10532                 return ret;
10533         }
10534         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10535         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10536         ret = btrfs_commit_transaction(trans, root);
10537         return ret;
10538 }
10539
10540 static int populate_csum(struct btrfs_trans_handle *trans,
10541                          struct btrfs_root *csum_root, char *buf, u64 start,
10542                          u64 len)
10543 {
10544         u64 offset = 0;
10545         u64 sectorsize;
10546         int ret = 0;
10547
10548         while (offset < len) {
10549                 sectorsize = csum_root->sectorsize;
10550                 ret = read_extent_data(csum_root, buf, start + offset,
10551                                        &sectorsize, 0);
10552                 if (ret)
10553                         break;
10554                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10555                                             start + offset, buf, sectorsize);
10556                 if (ret)
10557                         break;
10558                 offset += sectorsize;
10559         }
10560         return ret;
10561 }
10562
10563 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10564                                       struct btrfs_root *csum_root,
10565                                       struct btrfs_root *cur_root)
10566 {
10567         struct btrfs_path *path;
10568         struct btrfs_key key;
10569         struct extent_buffer *node;
10570         struct btrfs_file_extent_item *fi;
10571         char *buf = NULL;
10572         u64 start = 0;
10573         u64 len = 0;
10574         int slot = 0;
10575         int ret = 0;
10576
10577         path = btrfs_alloc_path();
10578         if (!path)
10579                 return -ENOMEM;
10580         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10581         if (!buf) {
10582                 ret = -ENOMEM;
10583                 goto out;
10584         }
10585
10586         key.objectid = 0;
10587         key.offset = 0;
10588         key.type = 0;
10589
10590         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10591         if (ret < 0)
10592                 goto out;
10593         /* Iterate all regular file extents and fill its csum */
10594         while (1) {
10595                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10596
10597                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10598                         goto next;
10599                 node = path->nodes[0];
10600                 slot = path->slots[0];
10601                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10602                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10603                         goto next;
10604                 start = btrfs_file_extent_disk_bytenr(node, fi);
10605                 len = btrfs_file_extent_disk_num_bytes(node, fi);
10606
10607                 ret = populate_csum(trans, csum_root, buf, start, len);
10608                 if (ret == -EEXIST)
10609                         ret = 0;
10610                 if (ret < 0)
10611                         goto out;
10612 next:
10613                 /*
10614                  * TODO: if next leaf is corrupted, jump to nearest next valid
10615                  * leaf.
10616                  */
10617                 ret = btrfs_next_item(cur_root, path);
10618                 if (ret < 0)
10619                         goto out;
10620                 if (ret > 0) {
10621                         ret = 0;
10622                         goto out;
10623                 }
10624         }
10625
10626 out:
10627         btrfs_free_path(path);
10628         free(buf);
10629         return ret;
10630 }
10631
10632 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10633                                   struct btrfs_root *csum_root)
10634 {
10635         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10636         struct btrfs_path *path;
10637         struct btrfs_root *tree_root = fs_info->tree_root;
10638         struct btrfs_root *cur_root;
10639         struct extent_buffer *node;
10640         struct btrfs_key key;
10641         int slot = 0;
10642         int ret = 0;
10643
10644         path = btrfs_alloc_path();
10645         if (!path)
10646                 return -ENOMEM;
10647
10648         key.objectid = BTRFS_FS_TREE_OBJECTID;
10649         key.offset = 0;
10650         key.type = BTRFS_ROOT_ITEM_KEY;
10651
10652         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10653         if (ret < 0)
10654                 goto out;
10655         if (ret > 0) {
10656                 ret = -ENOENT;
10657                 goto out;
10658         }
10659
10660         while (1) {
10661                 node = path->nodes[0];
10662                 slot = path->slots[0];
10663                 btrfs_item_key_to_cpu(node, &key, slot);
10664                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10665                         goto out;
10666                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10667                         goto next;
10668                 if (!is_fstree(key.objectid))
10669                         goto next;
10670                 key.offset = (u64)-1;
10671
10672                 cur_root = btrfs_read_fs_root(fs_info, &key);
10673                 if (IS_ERR(cur_root) || !cur_root) {
10674                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10675                                 key.objectid);
10676                         goto out;
10677                 }
10678                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10679                                 cur_root);
10680                 if (ret < 0)
10681                         goto out;
10682 next:
10683                 ret = btrfs_next_item(tree_root, path);
10684                 if (ret > 0) {
10685                         ret = 0;
10686                         goto out;
10687                 }
10688                 if (ret < 0)
10689                         goto out;
10690         }
10691
10692 out:
10693         btrfs_free_path(path);
10694         return ret;
10695 }
10696
10697 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10698                                       struct btrfs_root *csum_root)
10699 {
10700         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10701         struct btrfs_path *path;
10702         struct btrfs_extent_item *ei;
10703         struct extent_buffer *leaf;
10704         char *buf;
10705         struct btrfs_key key;
10706         int ret;
10707
10708         path = btrfs_alloc_path();
10709         if (!path)
10710                 return -ENOMEM;
10711
10712         key.objectid = 0;
10713         key.type = BTRFS_EXTENT_ITEM_KEY;
10714         key.offset = 0;
10715
10716         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10717         if (ret < 0) {
10718                 btrfs_free_path(path);
10719                 return ret;
10720         }
10721
10722         buf = malloc(csum_root->sectorsize);
10723         if (!buf) {
10724                 btrfs_free_path(path);
10725                 return -ENOMEM;
10726         }
10727
10728         while (1) {
10729                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10730                         ret = btrfs_next_leaf(extent_root, path);
10731                         if (ret < 0)
10732                                 break;
10733                         if (ret) {
10734                                 ret = 0;
10735                                 break;
10736                         }
10737                 }
10738                 leaf = path->nodes[0];
10739
10740                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10741                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10742                         path->slots[0]++;
10743                         continue;
10744                 }
10745
10746                 ei = btrfs_item_ptr(leaf, path->slots[0],
10747                                     struct btrfs_extent_item);
10748                 if (!(btrfs_extent_flags(leaf, ei) &
10749                       BTRFS_EXTENT_FLAG_DATA)) {
10750                         path->slots[0]++;
10751                         continue;
10752                 }
10753
10754                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10755                                     key.offset);
10756                 if (ret)
10757                         break;
10758                 path->slots[0]++;
10759         }
10760
10761         btrfs_free_path(path);
10762         free(buf);
10763         return ret;
10764 }
10765
10766 /*
10767  * Recalculate the csum and put it into the csum tree.
10768  *
10769  * Extent tree init will wipe out all the extent info, so in that case, we
10770  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10771  * will use fs/subvol trees to init the csum tree.
10772  */
10773 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10774                           struct btrfs_root *csum_root,
10775                           int search_fs_tree)
10776 {
10777         if (search_fs_tree)
10778                 return fill_csum_tree_from_fs(trans, csum_root);
10779         else
10780                 return fill_csum_tree_from_extent(trans, csum_root);
10781 }
10782
10783 static void free_roots_info_cache(void)
10784 {
10785         if (!roots_info_cache)
10786                 return;
10787
10788         while (!cache_tree_empty(roots_info_cache)) {
10789                 struct cache_extent *entry;
10790                 struct root_item_info *rii;
10791
10792                 entry = first_cache_extent(roots_info_cache);
10793                 if (!entry)
10794                         break;
10795                 remove_cache_extent(roots_info_cache, entry);
10796                 rii = container_of(entry, struct root_item_info, cache_extent);
10797                 free(rii);
10798         }
10799
10800         free(roots_info_cache);
10801         roots_info_cache = NULL;
10802 }
10803
10804 static int build_roots_info_cache(struct btrfs_fs_info *info)
10805 {
10806         int ret = 0;
10807         struct btrfs_key key;
10808         struct extent_buffer *leaf;
10809         struct btrfs_path *path;
10810
10811         if (!roots_info_cache) {
10812                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10813                 if (!roots_info_cache)
10814                         return -ENOMEM;
10815                 cache_tree_init(roots_info_cache);
10816         }
10817
10818         path = btrfs_alloc_path();
10819         if (!path)
10820                 return -ENOMEM;
10821
10822         key.objectid = 0;
10823         key.type = BTRFS_EXTENT_ITEM_KEY;
10824         key.offset = 0;
10825
10826         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10827         if (ret < 0)
10828                 goto out;
10829         leaf = path->nodes[0];
10830
10831         while (1) {
10832                 struct btrfs_key found_key;
10833                 struct btrfs_extent_item *ei;
10834                 struct btrfs_extent_inline_ref *iref;
10835                 int slot = path->slots[0];
10836                 int type;
10837                 u64 flags;
10838                 u64 root_id;
10839                 u8 level;
10840                 struct cache_extent *entry;
10841                 struct root_item_info *rii;
10842
10843                 if (slot >= btrfs_header_nritems(leaf)) {
10844                         ret = btrfs_next_leaf(info->extent_root, path);
10845                         if (ret < 0) {
10846                                 break;
10847                         } else if (ret) {
10848                                 ret = 0;
10849                                 break;
10850                         }
10851                         leaf = path->nodes[0];
10852                         slot = path->slots[0];
10853                 }
10854
10855                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10856
10857                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10858                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10859                         goto next;
10860
10861                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10862                 flags = btrfs_extent_flags(leaf, ei);
10863
10864                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10865                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10866                         goto next;
10867
10868                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10869                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10870                         level = found_key.offset;
10871                 } else {
10872                         struct btrfs_tree_block_info *binfo;
10873
10874                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10875                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10876                         level = btrfs_tree_block_level(leaf, binfo);
10877                 }
10878
10879                 /*
10880                  * For a root extent, it must be of the following type and the
10881                  * first (and only one) iref in the item.
10882                  */
10883                 type = btrfs_extent_inline_ref_type(leaf, iref);
10884                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10885                         goto next;
10886
10887                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10888                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10889                 if (!entry) {
10890                         rii = malloc(sizeof(struct root_item_info));
10891                         if (!rii) {
10892                                 ret = -ENOMEM;
10893                                 goto out;
10894                         }
10895                         rii->cache_extent.start = root_id;
10896                         rii->cache_extent.size = 1;
10897                         rii->level = (u8)-1;
10898                         entry = &rii->cache_extent;
10899                         ret = insert_cache_extent(roots_info_cache, entry);
10900                         ASSERT(ret == 0);
10901                 } else {
10902                         rii = container_of(entry, struct root_item_info,
10903                                            cache_extent);
10904                 }
10905
10906                 ASSERT(rii->cache_extent.start == root_id);
10907                 ASSERT(rii->cache_extent.size == 1);
10908
10909                 if (level > rii->level || rii->level == (u8)-1) {
10910                         rii->level = level;
10911                         rii->bytenr = found_key.objectid;
10912                         rii->gen = btrfs_extent_generation(leaf, ei);
10913                         rii->node_count = 1;
10914                 } else if (level == rii->level) {
10915                         rii->node_count++;
10916                 }
10917 next:
10918                 path->slots[0]++;
10919         }
10920
10921 out:
10922         btrfs_free_path(path);
10923
10924         return ret;
10925 }
10926
10927 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10928                                   struct btrfs_path *path,
10929                                   const struct btrfs_key *root_key,
10930                                   const int read_only_mode)
10931 {
10932         const u64 root_id = root_key->objectid;
10933         struct cache_extent *entry;
10934         struct root_item_info *rii;
10935         struct btrfs_root_item ri;
10936         unsigned long offset;
10937
10938         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10939         if (!entry) {
10940                 fprintf(stderr,
10941                         "Error: could not find extent items for root %llu\n",
10942                         root_key->objectid);
10943                 return -ENOENT;
10944         }
10945
10946         rii = container_of(entry, struct root_item_info, cache_extent);
10947         ASSERT(rii->cache_extent.start == root_id);
10948         ASSERT(rii->cache_extent.size == 1);
10949
10950         if (rii->node_count != 1) {
10951                 fprintf(stderr,
10952                         "Error: could not find btree root extent for root %llu\n",
10953                         root_id);
10954                 return -ENOENT;
10955         }
10956
10957         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
10958         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
10959
10960         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
10961             btrfs_root_level(&ri) != rii->level ||
10962             btrfs_root_generation(&ri) != rii->gen) {
10963
10964                 /*
10965                  * If we're in repair mode but our caller told us to not update
10966                  * the root item, i.e. just check if it needs to be updated, don't
10967                  * print this message, since the caller will call us again shortly
10968                  * for the same root item without read only mode (the caller will
10969                  * open a transaction first).
10970                  */
10971                 if (!(read_only_mode && repair))
10972                         fprintf(stderr,
10973                                 "%sroot item for root %llu,"
10974                                 " current bytenr %llu, current gen %llu, current level %u,"
10975                                 " new bytenr %llu, new gen %llu, new level %u\n",
10976                                 (read_only_mode ? "" : "fixing "),
10977                                 root_id,
10978                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10979                                 btrfs_root_level(&ri),
10980                                 rii->bytenr, rii->gen, rii->level);
10981
10982                 if (btrfs_root_generation(&ri) > rii->gen) {
10983                         fprintf(stderr,
10984                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
10985                                 root_id, btrfs_root_generation(&ri), rii->gen);
10986                         return -EINVAL;
10987                 }
10988
10989                 if (!read_only_mode) {
10990                         btrfs_set_root_bytenr(&ri, rii->bytenr);
10991                         btrfs_set_root_level(&ri, rii->level);
10992                         btrfs_set_root_generation(&ri, rii->gen);
10993                         write_extent_buffer(path->nodes[0], &ri,
10994                                             offset, sizeof(ri));
10995                 }
10996
10997                 return 1;
10998         }
10999
11000         return 0;
11001 }
11002
11003 /*
11004  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11005  * caused read-only snapshots to be corrupted if they were created at a moment
11006  * when the source subvolume/snapshot had orphan items. The issue was that the
11007  * on-disk root items became incorrect, referring to the pre orphan cleanup root
11008  * node instead of the post orphan cleanup root node.
11009  * So this function, and its callees, just detects and fixes those cases. Even
11010  * though the regression was for read-only snapshots, this function applies to
11011  * any snapshot/subvolume root.
11012  * This must be run before any other repair code - not doing it so, makes other
11013  * repair code delete or modify backrefs in the extent tree for example, which
11014  * will result in an inconsistent fs after repairing the root items.
11015  */
11016 static int repair_root_items(struct btrfs_fs_info *info)
11017 {
11018         struct btrfs_path *path = NULL;
11019         struct btrfs_key key;
11020         struct extent_buffer *leaf;
11021         struct btrfs_trans_handle *trans = NULL;
11022         int ret = 0;
11023         int bad_roots = 0;
11024         int need_trans = 0;
11025
11026         ret = build_roots_info_cache(info);
11027         if (ret)
11028                 goto out;
11029
11030         path = btrfs_alloc_path();
11031         if (!path) {
11032                 ret = -ENOMEM;
11033                 goto out;
11034         }
11035
11036         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11037         key.type = BTRFS_ROOT_ITEM_KEY;
11038         key.offset = 0;
11039
11040 again:
11041         /*
11042          * Avoid opening and committing transactions if a leaf doesn't have
11043          * any root items that need to be fixed, so that we avoid rotating
11044          * backup roots unnecessarily.
11045          */
11046         if (need_trans) {
11047                 trans = btrfs_start_transaction(info->tree_root, 1);
11048                 if (IS_ERR(trans)) {
11049                         ret = PTR_ERR(trans);
11050                         goto out;
11051                 }
11052         }
11053
11054         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11055                                 0, trans ? 1 : 0);
11056         if (ret < 0)
11057                 goto out;
11058         leaf = path->nodes[0];
11059
11060         while (1) {
11061                 struct btrfs_key found_key;
11062
11063                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11064                         int no_more_keys = find_next_key(path, &key);
11065
11066                         btrfs_release_path(path);
11067                         if (trans) {
11068                                 ret = btrfs_commit_transaction(trans,
11069                                                                info->tree_root);
11070                                 trans = NULL;
11071                                 if (ret < 0)
11072                                         goto out;
11073                         }
11074                         need_trans = 0;
11075                         if (no_more_keys)
11076                                 break;
11077                         goto again;
11078                 }
11079
11080                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11081
11082                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11083                         goto next;
11084                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11085                         goto next;
11086
11087                 ret = maybe_repair_root_item(info, path, &found_key,
11088                                              trans ? 0 : 1);
11089                 if (ret < 0)
11090                         goto out;
11091                 if (ret) {
11092                         if (!trans && repair) {
11093                                 need_trans = 1;
11094                                 key = found_key;
11095                                 btrfs_release_path(path);
11096                                 goto again;
11097                         }
11098                         bad_roots++;
11099                 }
11100 next:
11101                 path->slots[0]++;
11102         }
11103         ret = 0;
11104 out:
11105         free_roots_info_cache();
11106         btrfs_free_path(path);
11107         if (trans)
11108                 btrfs_commit_transaction(trans, info->tree_root);
11109         if (ret < 0)
11110                 return ret;
11111
11112         return bad_roots;
11113 }
11114
11115 const char * const cmd_check_usage[] = {
11116         "btrfs check [options] <device>",
11117         "Check structural integrity of a filesystem (unmounted).",
11118         "Check structural integrity of an unmounted filesystem. Verify internal",
11119         "trees' consistency and item connectivity. In the repair mode try to",
11120         "fix the problems found.",
11121         "WARNING: the repair mode is considered dangerous",
11122         "",
11123         "-s|--super <superblock>     use this superblock copy",
11124         "-b|--backup                 use the first valid backup root copy",
11125         "--repair                    try to repair the filesystem",
11126         "--readonly                  run in read-only mode (default)",
11127         "--init-csum-tree            create a new CRC tree",
11128         "--init-extent-tree          create a new extent tree",
11129         "--low-memory                check in low memory usage mode(experimental)",
11130         "--check-data-csum           verify checksums of data blocks",
11131         "-Q|--qgroup-report           print a report on qgroup consistency",
11132         "-E|--subvol-extents <subvolid>",
11133         "                            print subvolume extents and sharing state",
11134         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
11135         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
11136         "-p|--progress               indicate progress",
11137         NULL
11138 };
11139
11140 int cmd_check(int argc, char **argv)
11141 {
11142         struct cache_tree root_cache;
11143         struct btrfs_root *root;
11144         struct btrfs_fs_info *info;
11145         u64 bytenr = 0;
11146         u64 subvolid = 0;
11147         u64 tree_root_bytenr = 0;
11148         u64 chunk_root_bytenr = 0;
11149         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11150         int ret;
11151         u64 num;
11152         int init_csum_tree = 0;
11153         int readonly = 0;
11154         int qgroup_report = 0;
11155         int qgroups_repaired = 0;
11156         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
11157
11158         while(1) {
11159                 int c;
11160                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11161                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11162                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11163                         GETOPT_VAL_LOW_MEMORY };
11164                 static const struct option long_options[] = {
11165                         { "super", required_argument, NULL, 's' },
11166                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11167                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11168                         { "init-csum-tree", no_argument, NULL,
11169                                 GETOPT_VAL_INIT_CSUM },
11170                         { "init-extent-tree", no_argument, NULL,
11171                                 GETOPT_VAL_INIT_EXTENT },
11172                         { "check-data-csum", no_argument, NULL,
11173                                 GETOPT_VAL_CHECK_CSUM },
11174                         { "backup", no_argument, NULL, 'b' },
11175                         { "subvol-extents", required_argument, NULL, 'E' },
11176                         { "qgroup-report", no_argument, NULL, 'Q' },
11177                         { "tree-root", required_argument, NULL, 'r' },
11178                         { "chunk-root", required_argument, NULL,
11179                                 GETOPT_VAL_CHUNK_TREE },
11180                         { "progress", no_argument, NULL, 'p' },
11181                         { "low-memory", no_argument, NULL,
11182                                 GETOPT_VAL_LOW_MEMORY },
11183                         { NULL, 0, NULL, 0}
11184                 };
11185
11186                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11187                 if (c < 0)
11188                         break;
11189                 switch(c) {
11190                         case 'a': /* ignored */ break;
11191                         case 'b':
11192                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11193                                 break;
11194                         case 's':
11195                                 num = arg_strtou64(optarg);
11196                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11197                                         fprintf(stderr,
11198                                                 "ERROR: super mirror should be less than: %d\n",
11199                                                 BTRFS_SUPER_MIRROR_MAX);
11200                                         exit(1);
11201                                 }
11202                                 bytenr = btrfs_sb_offset(((int)num));
11203                                 printf("using SB copy %llu, bytenr %llu\n", num,
11204                                        (unsigned long long)bytenr);
11205                                 break;
11206                         case 'Q':
11207                                 qgroup_report = 1;
11208                                 break;
11209                         case 'E':
11210                                 subvolid = arg_strtou64(optarg);
11211                                 break;
11212                         case 'r':
11213                                 tree_root_bytenr = arg_strtou64(optarg);
11214                                 break;
11215                         case GETOPT_VAL_CHUNK_TREE:
11216                                 chunk_root_bytenr = arg_strtou64(optarg);
11217                                 break;
11218                         case 'p':
11219                                 ctx.progress_enabled = true;
11220                                 break;
11221                         case '?':
11222                         case 'h':
11223                                 usage(cmd_check_usage);
11224                         case GETOPT_VAL_REPAIR:
11225                                 printf("enabling repair mode\n");
11226                                 repair = 1;
11227                                 ctree_flags |= OPEN_CTREE_WRITES;
11228                                 break;
11229                         case GETOPT_VAL_READONLY:
11230                                 readonly = 1;
11231                                 break;
11232                         case GETOPT_VAL_INIT_CSUM:
11233                                 printf("Creating a new CRC tree\n");
11234                                 init_csum_tree = 1;
11235                                 repair = 1;
11236                                 ctree_flags |= OPEN_CTREE_WRITES;
11237                                 break;
11238                         case GETOPT_VAL_INIT_EXTENT:
11239                                 init_extent_tree = 1;
11240                                 ctree_flags |= (OPEN_CTREE_WRITES |
11241                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
11242                                 repair = 1;
11243                                 break;
11244                         case GETOPT_VAL_CHECK_CSUM:
11245                                 check_data_csum = 1;
11246                                 break;
11247                         case GETOPT_VAL_LOW_MEMORY:
11248                                 low_memory = 1;
11249                                 break;
11250                 }
11251         }
11252
11253         if (check_argc_exact(argc - optind, 1))
11254                 usage(cmd_check_usage);
11255
11256         if (ctx.progress_enabled) {
11257                 ctx.tp = TASK_NOTHING;
11258                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11259         }
11260
11261         /* This check is the only reason for --readonly to exist */
11262         if (readonly && repair) {
11263                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11264                 exit(1);
11265         }
11266
11267         /*
11268          * Not supported yet
11269          */
11270         if (repair && low_memory) {
11271                 error("Low memory mode doesn't support repair yet");
11272                 exit(1);
11273         }
11274
11275         radix_tree_init();
11276         cache_tree_init(&root_cache);
11277
11278         if((ret = check_mounted(argv[optind])) < 0) {
11279                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11280                 goto err_out;
11281         } else if(ret) {
11282                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11283                 ret = -EBUSY;
11284                 goto err_out;
11285         }
11286
11287         /* only allow partial opening under repair mode */
11288         if (repair)
11289                 ctree_flags |= OPEN_CTREE_PARTIAL;
11290
11291         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11292                                   chunk_root_bytenr, ctree_flags);
11293         if (!info) {
11294                 fprintf(stderr, "Couldn't open file system\n");
11295                 ret = -EIO;
11296                 goto err_out;
11297         }
11298
11299         global_info = info;
11300         root = info->fs_root;
11301
11302         /*
11303          * repair mode will force us to commit transaction which
11304          * will make us fail to load log tree when mounting.
11305          */
11306         if (repair && btrfs_super_log_root(info->super_copy)) {
11307                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11308                 if (!ret) {
11309                         ret = 1;
11310                         goto close_out;
11311                 }
11312                 ret = zero_log_tree(root);
11313                 if (ret) {
11314                         fprintf(stderr, "fail to zero log tree\n");
11315                         goto close_out;
11316                 }
11317         }
11318
11319         uuid_unparse(info->super_copy->fsid, uuidbuf);
11320         if (qgroup_report) {
11321                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11322                        uuidbuf);
11323                 ret = qgroup_verify_all(info);
11324                 if (ret == 0)
11325                         report_qgroups(1);
11326                 goto close_out;
11327         }
11328         if (subvolid) {
11329                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11330                        subvolid, argv[optind], uuidbuf);
11331                 ret = print_extent_state(info, subvolid);
11332                 goto close_out;
11333         }
11334         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11335
11336         if (!extent_buffer_uptodate(info->tree_root->node) ||
11337             !extent_buffer_uptodate(info->dev_root->node) ||
11338             !extent_buffer_uptodate(info->chunk_root->node)) {
11339                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11340                 ret = -EIO;
11341                 goto close_out;
11342         }
11343
11344         if (init_extent_tree || init_csum_tree) {
11345                 struct btrfs_trans_handle *trans;
11346
11347                 trans = btrfs_start_transaction(info->extent_root, 0);
11348                 if (IS_ERR(trans)) {
11349                         fprintf(stderr, "Error starting transaction\n");
11350                         ret = PTR_ERR(trans);
11351                         goto close_out;
11352                 }
11353
11354                 if (init_extent_tree) {
11355                         printf("Creating a new extent tree\n");
11356                         ret = reinit_extent_tree(trans, info);
11357                         if (ret)
11358                                 goto close_out;
11359                 }
11360
11361                 if (init_csum_tree) {
11362                         fprintf(stderr, "Reinit crc root\n");
11363                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11364                         if (ret) {
11365                                 fprintf(stderr, "crc root initialization failed\n");
11366                                 ret = -EIO;
11367                                 goto close_out;
11368                         }
11369
11370                         ret = fill_csum_tree(trans, info->csum_root,
11371                                              init_extent_tree);
11372                         if (ret) {
11373                                 fprintf(stderr, "crc refilling failed\n");
11374                                 return -EIO;
11375                         }
11376                 }
11377                 /*
11378                  * Ok now we commit and run the normal fsck, which will add
11379                  * extent entries for all of the items it finds.
11380                  */
11381                 ret = btrfs_commit_transaction(trans, info->extent_root);
11382                 if (ret)
11383                         goto close_out;
11384         }
11385         if (!extent_buffer_uptodate(info->extent_root->node)) {
11386                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11387                 ret = -EIO;
11388                 goto close_out;
11389         }
11390         if (!extent_buffer_uptodate(info->csum_root->node)) {
11391                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11392                 ret = -EIO;
11393                 goto close_out;
11394         }
11395
11396         if (!ctx.progress_enabled)
11397                 fprintf(stderr, "checking extents\n");
11398         if (low_memory)
11399                 ret = check_chunks_and_extents_v2(root);
11400         else
11401                 ret = check_chunks_and_extents(root);
11402         if (ret)
11403                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11404
11405         ret = repair_root_items(info);
11406         if (ret < 0)
11407                 goto close_out;
11408         if (repair) {
11409                 fprintf(stderr, "Fixed %d roots.\n", ret);
11410                 ret = 0;
11411         } else if (ret > 0) {
11412                 fprintf(stderr,
11413                        "Found %d roots with an outdated root item.\n",
11414                        ret);
11415                 fprintf(stderr,
11416                         "Please run a filesystem check with the option --repair to fix them.\n");
11417                 ret = 1;
11418                 goto close_out;
11419         }
11420
11421         if (!ctx.progress_enabled) {
11422                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11423                         fprintf(stderr, "checking free space tree\n");
11424                 else
11425                         fprintf(stderr, "checking free space cache\n");
11426         }
11427         ret = check_space_cache(root);
11428         if (ret)
11429                 goto out;
11430
11431         /*
11432          * We used to have to have these hole extents in between our real
11433          * extents so if we don't have this flag set we need to make sure there
11434          * are no gaps in the file extents for inodes, otherwise we can just
11435          * ignore it when this happens.
11436          */
11437         no_holes = btrfs_fs_incompat(root->fs_info,
11438                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11439         if (!ctx.progress_enabled)
11440                 fprintf(stderr, "checking fs roots\n");
11441         ret = check_fs_roots(root, &root_cache);
11442         if (ret)
11443                 goto out;
11444
11445         fprintf(stderr, "checking csums\n");
11446         ret = check_csums(root);
11447         if (ret)
11448                 goto out;
11449
11450         fprintf(stderr, "checking root refs\n");
11451         ret = check_root_refs(root, &root_cache);
11452         if (ret)
11453                 goto out;
11454
11455         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11456                 struct extent_buffer *eb;
11457
11458                 eb = list_first_entry(&root->fs_info->recow_ebs,
11459                                       struct extent_buffer, recow);
11460                 list_del_init(&eb->recow);
11461                 ret = recow_extent_buffer(root, eb);
11462                 if (ret)
11463                         break;
11464         }
11465
11466         while (!list_empty(&delete_items)) {
11467                 struct bad_item *bad;
11468
11469                 bad = list_first_entry(&delete_items, struct bad_item, list);
11470                 list_del_init(&bad->list);
11471                 if (repair)
11472                         ret = delete_bad_item(root, bad);
11473                 free(bad);
11474         }
11475
11476         if (info->quota_enabled) {
11477                 int err;
11478                 fprintf(stderr, "checking quota groups\n");
11479                 err = qgroup_verify_all(info);
11480                 if (err)
11481                         goto out;
11482                 report_qgroups(0);
11483                 err = repair_qgroups(info, &qgroups_repaired);
11484                 if (err)
11485                         goto out;
11486         }
11487
11488         if (!list_empty(&root->fs_info->recow_ebs)) {
11489                 fprintf(stderr, "Transid errors in file system\n");
11490                 ret = 1;
11491         }
11492 out:
11493         /* Don't override original ret */
11494         if (!ret && qgroups_repaired)
11495                 ret = qgroups_repaired;
11496
11497         if (found_old_backref) { /*
11498                  * there was a disk format change when mixed
11499                  * backref was in testing tree. The old format
11500                  * existed about one week.
11501                  */
11502                 printf("\n * Found old mixed backref format. "
11503                        "The old format is not supported! *"
11504                        "\n * Please mount the FS in readonly mode, "
11505                        "backup data and re-format the FS. *\n\n");
11506                 ret = 1;
11507         }
11508         printf("found %llu bytes used err is %d\n",
11509                (unsigned long long)bytes_used, ret);
11510         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11511         printf("total tree bytes: %llu\n",
11512                (unsigned long long)total_btree_bytes);
11513         printf("total fs tree bytes: %llu\n",
11514                (unsigned long long)total_fs_tree_bytes);
11515         printf("total extent tree bytes: %llu\n",
11516                (unsigned long long)total_extent_tree_bytes);
11517         printf("btree space waste bytes: %llu\n",
11518                (unsigned long long)btree_space_waste);
11519         printf("file data blocks allocated: %llu\n referenced %llu\n",
11520                 (unsigned long long)data_bytes_allocated,
11521                 (unsigned long long)data_bytes_referenced);
11522
11523         free_qgroup_counts();
11524         free_root_recs_tree(&root_cache);
11525 close_out:
11526         close_ctree(root);
11527 err_out:
11528         if (ctx.progress_enabled)
11529                 task_deinit(ctx.info);
11530
11531         return ret;
11532 }