5cf0c89f66dff3bf90ce08cb4ae02ed95abe4c73
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 struct extent_backref {
78         struct rb_node node;
79         unsigned int is_data:1;
80         unsigned int found_extent_tree:1;
81         unsigned int full_backref:1;
82         unsigned int found_ref:1;
83         unsigned int broken:1;
84 };
85
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
87 {
88         return rb_entry(node, struct extent_backref, node);
89 }
90
91 struct data_backref {
92         struct extent_backref node;
93         union {
94                 u64 parent;
95                 u64 root;
96         };
97         u64 owner;
98         u64 offset;
99         u64 disk_bytenr;
100         u64 bytes;
101         u64 ram_bytes;
102         u32 num_refs;
103         u32 found_ref;
104 };
105
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
107 {
108         return container_of(back, struct data_backref, node);
109 }
110
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
112 {
113         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115         struct data_backref *back1 = to_data_backref(ext1);
116         struct data_backref *back2 = to_data_backref(ext2);
117
118         WARN_ON(!ext1->is_data);
119         WARN_ON(!ext2->is_data);
120
121         /* parent and root are a union, so this covers both */
122         if (back1->parent > back2->parent)
123                 return 1;
124         if (back1->parent < back2->parent)
125                 return -1;
126
127         /* This is a full backref and the parents match. */
128         if (back1->node.full_backref)
129                 return 0;
130
131         if (back1->owner > back2->owner)
132                 return 1;
133         if (back1->owner < back2->owner)
134                 return -1;
135
136         if (back1->offset > back2->offset)
137                 return 1;
138         if (back1->offset < back2->offset)
139                 return -1;
140
141         if (back1->bytes > back2->bytes)
142                 return 1;
143         if (back1->bytes < back2->bytes)
144                 return -1;
145
146         if (back1->found_ref && back2->found_ref) {
147                 if (back1->disk_bytenr > back2->disk_bytenr)
148                         return 1;
149                 if (back1->disk_bytenr < back2->disk_bytenr)
150                         return -1;
151
152                 if (back1->found_ref > back2->found_ref)
153                         return 1;
154                 if (back1->found_ref < back2->found_ref)
155                         return -1;
156         }
157
158         return 0;
159 }
160
161 /*
162  * Much like data_backref, just removed the undetermined members
163  * and change it to use list_head.
164  * During extent scan, it is stored in root->orphan_data_extent.
165  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
166  */
167 struct orphan_data_extent {
168         struct list_head list;
169         u64 root;
170         u64 objectid;
171         u64 offset;
172         u64 disk_bytenr;
173         u64 disk_len;
174 };
175
176 struct tree_backref {
177         struct extent_backref node;
178         union {
179                 u64 parent;
180                 u64 root;
181         };
182 };
183
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
185 {
186         return container_of(back, struct tree_backref, node);
187 }
188
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
190 {
191         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193         struct tree_backref *back1 = to_tree_backref(ext1);
194         struct tree_backref *back2 = to_tree_backref(ext2);
195
196         WARN_ON(ext1->is_data);
197         WARN_ON(ext2->is_data);
198
199         /* parent and root are a union, so this covers both */
200         if (back1->parent > back2->parent)
201                 return 1;
202         if (back1->parent < back2->parent)
203                 return -1;
204
205         return 0;
206 }
207
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
209 {
210         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
212
213         if (ext1->is_data > ext2->is_data)
214                 return 1;
215
216         if (ext1->is_data < ext2->is_data)
217                 return -1;
218
219         if (ext1->full_backref > ext2->full_backref)
220                 return 1;
221         if (ext1->full_backref < ext2->full_backref)
222                 return -1;
223
224         if (ext1->is_data)
225                 return compare_data_backref(node1, node2);
226         else
227                 return compare_tree_backref(node1, node2);
228 }
229
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
232
233 struct extent_record {
234         struct list_head backrefs;
235         struct list_head dups;
236         struct rb_root backref_tree;
237         struct list_head list;
238         struct cache_extent cache;
239         struct btrfs_disk_key parent_key;
240         u64 start;
241         u64 max_size;
242         u64 nr;
243         u64 refs;
244         u64 extent_item_refs;
245         u64 generation;
246         u64 parent_generation;
247         u64 info_objectid;
248         u32 num_duplicates;
249         u8 info_level;
250         unsigned int flag_block_full_backref:2;
251         unsigned int found_rec:1;
252         unsigned int content_checked:1;
253         unsigned int owner_ref_checked:1;
254         unsigned int is_root:1;
255         unsigned int metadata:1;
256         unsigned int bad_full_backref:1;
257         unsigned int crossing_stripes:1;
258         unsigned int wrong_chunk_type:1;
259 };
260
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
262 {
263         return container_of(entry, struct extent_record, list);
264 }
265
266 struct inode_backref {
267         struct list_head list;
268         unsigned int found_dir_item:1;
269         unsigned int found_dir_index:1;
270         unsigned int found_inode_ref:1;
271         unsigned int filetype:8;
272         int errors;
273         unsigned int ref_type;
274         u64 dir;
275         u64 index;
276         u16 namelen;
277         char name[0];
278 };
279
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
281 {
282         return list_entry(entry, struct inode_backref, list);
283 }
284
285 struct root_item_record {
286         struct list_head list;
287         u64 objectid;
288         u64 bytenr;
289         u64 last_snapshot;
290         u8 level;
291         u8 drop_level;
292         int level_size;
293         struct btrfs_key drop_key;
294 };
295
296 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
298 #define REF_ERR_NO_INODE_REF            (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
301 #define REF_ERR_DUP_INODE_REF           (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF             (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
309
310 struct file_extent_hole {
311         struct rb_node node;
312         u64 start;
313         u64 len;
314 };
315
316 struct inode_record {
317         struct list_head backrefs;
318         unsigned int checked:1;
319         unsigned int merging:1;
320         unsigned int found_inode_item:1;
321         unsigned int found_dir_item:1;
322         unsigned int found_file_extent:1;
323         unsigned int found_csum_item:1;
324         unsigned int some_csum_missing:1;
325         unsigned int nodatasum:1;
326         int errors;
327
328         u64 ino;
329         u32 nlink;
330         u32 imode;
331         u64 isize;
332         u64 nbytes;
333
334         u32 found_link;
335         u64 found_size;
336         u64 extent_start;
337         u64 extent_end;
338         struct rb_root holes;
339         struct list_head orphan_extents;
340
341         u32 refs;
342 };
343
344 #define I_ERR_NO_INODE_ITEM             (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
359
360 struct root_backref {
361         struct list_head list;
362         unsigned int found_dir_item:1;
363         unsigned int found_dir_index:1;
364         unsigned int found_back_ref:1;
365         unsigned int found_forward_ref:1;
366         unsigned int reachable:1;
367         int errors;
368         u64 ref_root;
369         u64 dir;
370         u64 index;
371         u16 namelen;
372         char name[0];
373 };
374
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
376 {
377         return list_entry(entry, struct root_backref, list);
378 }
379
380 struct root_record {
381         struct list_head backrefs;
382         struct cache_extent cache;
383         unsigned int found_root_item:1;
384         u64 objectid;
385         u32 found_ref;
386 };
387
388 struct ptr_node {
389         struct cache_extent cache;
390         void *data;
391 };
392
393 struct shared_node {
394         struct cache_extent cache;
395         struct cache_tree root_cache;
396         struct cache_tree inode_cache;
397         struct inode_record *current;
398         u32 refs;
399 };
400
401 struct block_info {
402         u64 start;
403         u32 size;
404 };
405
406 struct walk_control {
407         struct cache_tree shared;
408         struct shared_node *nodes[BTRFS_MAX_LEVEL];
409         int active_node;
410         int root_level;
411 };
412
413 struct bad_item {
414         struct btrfs_key key;
415         u64 root_id;
416         struct list_head list;
417 };
418
419 struct extent_entry {
420         u64 bytenr;
421         u64 bytes;
422         int count;
423         int broken;
424         struct list_head list;
425 };
426
427 struct root_item_info {
428         /* level of the root */
429         u8 level;
430         /* number of nodes at this level, must be 1 for a root */
431         int node_count;
432         u64 bytenr;
433         u64 gen;
434         struct cache_extent cache_extent;
435 };
436
437 /*
438  * Error bit for low memory mode check.
439  *
440  * Currently no caller cares about it yet.  Just internal use for error
441  * classification.
442  */
443 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
444 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
445 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
446 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
447 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
448 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
449 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
450 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
451
452 static void *print_status_check(void *p)
453 {
454         struct task_ctx *priv = p;
455         const char work_indicator[] = { '.', 'o', 'O', 'o' };
456         uint32_t count = 0;
457         static char *task_position_string[] = {
458                 "checking extents",
459                 "checking free space cache",
460                 "checking fs roots",
461         };
462
463         task_period_start(priv->info, 1000 /* 1s */);
464
465         if (priv->tp == TASK_NOTHING)
466                 return NULL;
467
468         while (1) {
469                 printf("%s [%c]\r", task_position_string[priv->tp],
470                                 work_indicator[count % 4]);
471                 count++;
472                 fflush(stdout);
473                 task_period_wait(priv->info);
474         }
475         return NULL;
476 }
477
478 static int print_status_return(void *p)
479 {
480         printf("\n");
481         fflush(stdout);
482
483         return 0;
484 }
485
486 /* Compatible function to allow reuse of old codes */
487 static u64 first_extent_gap(struct rb_root *holes)
488 {
489         struct file_extent_hole *hole;
490
491         if (RB_EMPTY_ROOT(holes))
492                 return (u64)-1;
493
494         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
495         return hole->start;
496 }
497
498 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
499 {
500         struct file_extent_hole *hole1;
501         struct file_extent_hole *hole2;
502
503         hole1 = rb_entry(node1, struct file_extent_hole, node);
504         hole2 = rb_entry(node2, struct file_extent_hole, node);
505
506         if (hole1->start > hole2->start)
507                 return -1;
508         if (hole1->start < hole2->start)
509                 return 1;
510         /* Now hole1->start == hole2->start */
511         if (hole1->len >= hole2->len)
512                 /*
513                  * Hole 1 will be merge center
514                  * Same hole will be merged later
515                  */
516                 return -1;
517         /* Hole 2 will be merge center */
518         return 1;
519 }
520
521 /*
522  * Add a hole to the record
523  *
524  * This will do hole merge for copy_file_extent_holes(),
525  * which will ensure there won't be continuous holes.
526  */
527 static int add_file_extent_hole(struct rb_root *holes,
528                                 u64 start, u64 len)
529 {
530         struct file_extent_hole *hole;
531         struct file_extent_hole *prev = NULL;
532         struct file_extent_hole *next = NULL;
533
534         hole = malloc(sizeof(*hole));
535         if (!hole)
536                 return -ENOMEM;
537         hole->start = start;
538         hole->len = len;
539         /* Since compare will not return 0, no -EEXIST will happen */
540         rb_insert(holes, &hole->node, compare_hole);
541
542         /* simple merge with previous hole */
543         if (rb_prev(&hole->node))
544                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
545                                 node);
546         if (prev && prev->start + prev->len >= hole->start) {
547                 hole->len = hole->start + hole->len - prev->start;
548                 hole->start = prev->start;
549                 rb_erase(&prev->node, holes);
550                 free(prev);
551                 prev = NULL;
552         }
553
554         /* iterate merge with next holes */
555         while (1) {
556                 if (!rb_next(&hole->node))
557                         break;
558                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
559                                         node);
560                 if (hole->start + hole->len >= next->start) {
561                         if (hole->start + hole->len <= next->start + next->len)
562                                 hole->len = next->start + next->len -
563                                             hole->start;
564                         rb_erase(&next->node, holes);
565                         free(next);
566                         next = NULL;
567                 } else
568                         break;
569         }
570         return 0;
571 }
572
573 static int compare_hole_range(struct rb_node *node, void *data)
574 {
575         struct file_extent_hole *hole;
576         u64 start;
577
578         hole = (struct file_extent_hole *)data;
579         start = hole->start;
580
581         hole = rb_entry(node, struct file_extent_hole, node);
582         if (start < hole->start)
583                 return -1;
584         if (start >= hole->start && start < hole->start + hole->len)
585                 return 0;
586         return 1;
587 }
588
589 /*
590  * Delete a hole in the record
591  *
592  * This will do the hole split and is much restrict than add.
593  */
594 static int del_file_extent_hole(struct rb_root *holes,
595                                 u64 start, u64 len)
596 {
597         struct file_extent_hole *hole;
598         struct file_extent_hole tmp;
599         u64 prev_start = 0;
600         u64 prev_len = 0;
601         u64 next_start = 0;
602         u64 next_len = 0;
603         struct rb_node *node;
604         int have_prev = 0;
605         int have_next = 0;
606         int ret = 0;
607
608         tmp.start = start;
609         tmp.len = len;
610         node = rb_search(holes, &tmp, compare_hole_range, NULL);
611         if (!node)
612                 return -EEXIST;
613         hole = rb_entry(node, struct file_extent_hole, node);
614         if (start + len > hole->start + hole->len)
615                 return -EEXIST;
616
617         /*
618          * Now there will be no overlap, delete the hole and re-add the
619          * split(s) if they exists.
620          */
621         if (start > hole->start) {
622                 prev_start = hole->start;
623                 prev_len = start - hole->start;
624                 have_prev = 1;
625         }
626         if (hole->start + hole->len > start + len) {
627                 next_start = start + len;
628                 next_len = hole->start + hole->len - start - len;
629                 have_next = 1;
630         }
631         rb_erase(node, holes);
632         free(hole);
633         if (have_prev) {
634                 ret = add_file_extent_hole(holes, prev_start, prev_len);
635                 if (ret < 0)
636                         return ret;
637         }
638         if (have_next) {
639                 ret = add_file_extent_hole(holes, next_start, next_len);
640                 if (ret < 0)
641                         return ret;
642         }
643         return 0;
644 }
645
646 static int copy_file_extent_holes(struct rb_root *dst,
647                                   struct rb_root *src)
648 {
649         struct file_extent_hole *hole;
650         struct rb_node *node;
651         int ret = 0;
652
653         node = rb_first(src);
654         while (node) {
655                 hole = rb_entry(node, struct file_extent_hole, node);
656                 ret = add_file_extent_hole(dst, hole->start, hole->len);
657                 if (ret)
658                         break;
659                 node = rb_next(node);
660         }
661         return ret;
662 }
663
664 static void free_file_extent_holes(struct rb_root *holes)
665 {
666         struct rb_node *node;
667         struct file_extent_hole *hole;
668
669         node = rb_first(holes);
670         while (node) {
671                 hole = rb_entry(node, struct file_extent_hole, node);
672                 rb_erase(node, holes);
673                 free(hole);
674                 node = rb_first(holes);
675         }
676 }
677
678 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
679
680 static void record_root_in_trans(struct btrfs_trans_handle *trans,
681                                  struct btrfs_root *root)
682 {
683         if (root->last_trans != trans->transid) {
684                 root->track_dirty = 1;
685                 root->last_trans = trans->transid;
686                 root->commit_root = root->node;
687                 extent_buffer_get(root->node);
688         }
689 }
690
691 static u8 imode_to_type(u32 imode)
692 {
693 #define S_SHIFT 12
694         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
695                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
696                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
697                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
698                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
699                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
700                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
701                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
702         };
703
704         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
705 #undef S_SHIFT
706 }
707
708 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
709 {
710         struct device_record *rec1;
711         struct device_record *rec2;
712
713         rec1 = rb_entry(node1, struct device_record, node);
714         rec2 = rb_entry(node2, struct device_record, node);
715         if (rec1->devid > rec2->devid)
716                 return -1;
717         else if (rec1->devid < rec2->devid)
718                 return 1;
719         else
720                 return 0;
721 }
722
723 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
724 {
725         struct inode_record *rec;
726         struct inode_backref *backref;
727         struct inode_backref *orig;
728         struct inode_backref *tmp;
729         struct orphan_data_extent *src_orphan;
730         struct orphan_data_extent *dst_orphan;
731         size_t size;
732         int ret;
733
734         rec = malloc(sizeof(*rec));
735         if (!rec)
736                 return ERR_PTR(-ENOMEM);
737         memcpy(rec, orig_rec, sizeof(*rec));
738         rec->refs = 1;
739         INIT_LIST_HEAD(&rec->backrefs);
740         INIT_LIST_HEAD(&rec->orphan_extents);
741         rec->holes = RB_ROOT;
742
743         list_for_each_entry(orig, &orig_rec->backrefs, list) {
744                 size = sizeof(*orig) + orig->namelen + 1;
745                 backref = malloc(size);
746                 if (!backref) {
747                         ret = -ENOMEM;
748                         goto cleanup;
749                 }
750                 memcpy(backref, orig, size);
751                 list_add_tail(&backref->list, &rec->backrefs);
752         }
753         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
754                 dst_orphan = malloc(sizeof(*dst_orphan));
755                 if (!dst_orphan) {
756                         ret = -ENOMEM;
757                         goto cleanup;
758                 }
759                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
760                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
761         }
762         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
763         BUG_ON(ret < 0);
764
765         return rec;
766
767 cleanup:
768         if (!list_empty(&rec->backrefs))
769                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
770                         list_del(&orig->list);
771                         free(orig);
772                 }
773
774         if (!list_empty(&rec->orphan_extents))
775                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
776                         list_del(&orig->list);
777                         free(orig);
778                 }
779
780         free(rec);
781
782         return ERR_PTR(ret);
783 }
784
785 static void print_orphan_data_extents(struct list_head *orphan_extents,
786                                       u64 objectid)
787 {
788         struct orphan_data_extent *orphan;
789
790         if (list_empty(orphan_extents))
791                 return;
792         printf("The following data extent is lost in tree %llu:\n",
793                objectid);
794         list_for_each_entry(orphan, orphan_extents, list) {
795                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
796                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
797                        orphan->disk_len);
798         }
799 }
800
801 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
802 {
803         u64 root_objectid = root->root_key.objectid;
804         int errors = rec->errors;
805
806         if (!errors)
807                 return;
808         /* reloc root errors, we print its corresponding fs root objectid*/
809         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
810                 root_objectid = root->root_key.offset;
811                 fprintf(stderr, "reloc");
812         }
813         fprintf(stderr, "root %llu inode %llu errors %x",
814                 (unsigned long long) root_objectid,
815                 (unsigned long long) rec->ino, rec->errors);
816
817         if (errors & I_ERR_NO_INODE_ITEM)
818                 fprintf(stderr, ", no inode item");
819         if (errors & I_ERR_NO_ORPHAN_ITEM)
820                 fprintf(stderr, ", no orphan item");
821         if (errors & I_ERR_DUP_INODE_ITEM)
822                 fprintf(stderr, ", dup inode item");
823         if (errors & I_ERR_DUP_DIR_INDEX)
824                 fprintf(stderr, ", dup dir index");
825         if (errors & I_ERR_ODD_DIR_ITEM)
826                 fprintf(stderr, ", odd dir item");
827         if (errors & I_ERR_ODD_FILE_EXTENT)
828                 fprintf(stderr, ", odd file extent");
829         if (errors & I_ERR_BAD_FILE_EXTENT)
830                 fprintf(stderr, ", bad file extent");
831         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
832                 fprintf(stderr, ", file extent overlap");
833         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
834                 fprintf(stderr, ", file extent discount");
835         if (errors & I_ERR_DIR_ISIZE_WRONG)
836                 fprintf(stderr, ", dir isize wrong");
837         if (errors & I_ERR_FILE_NBYTES_WRONG)
838                 fprintf(stderr, ", nbytes wrong");
839         if (errors & I_ERR_ODD_CSUM_ITEM)
840                 fprintf(stderr, ", odd csum item");
841         if (errors & I_ERR_SOME_CSUM_MISSING)
842                 fprintf(stderr, ", some csum missing");
843         if (errors & I_ERR_LINK_COUNT_WRONG)
844                 fprintf(stderr, ", link count wrong");
845         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
846                 fprintf(stderr, ", orphan file extent");
847         fprintf(stderr, "\n");
848         /* Print the orphan extents if needed */
849         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
850                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
851
852         /* Print the holes if needed */
853         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
854                 struct file_extent_hole *hole;
855                 struct rb_node *node;
856                 int found = 0;
857
858                 node = rb_first(&rec->holes);
859                 fprintf(stderr, "Found file extent holes:\n");
860                 while (node) {
861                         found = 1;
862                         hole = rb_entry(node, struct file_extent_hole, node);
863                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
864                                 hole->start, hole->len);
865                         node = rb_next(node);
866                 }
867                 if (!found)
868                         fprintf(stderr, "\tstart: 0, len: %llu\n",
869                                 round_up(rec->isize, root->sectorsize));
870         }
871 }
872
873 static void print_ref_error(int errors)
874 {
875         if (errors & REF_ERR_NO_DIR_ITEM)
876                 fprintf(stderr, ", no dir item");
877         if (errors & REF_ERR_NO_DIR_INDEX)
878                 fprintf(stderr, ", no dir index");
879         if (errors & REF_ERR_NO_INODE_REF)
880                 fprintf(stderr, ", no inode ref");
881         if (errors & REF_ERR_DUP_DIR_ITEM)
882                 fprintf(stderr, ", dup dir item");
883         if (errors & REF_ERR_DUP_DIR_INDEX)
884                 fprintf(stderr, ", dup dir index");
885         if (errors & REF_ERR_DUP_INODE_REF)
886                 fprintf(stderr, ", dup inode ref");
887         if (errors & REF_ERR_INDEX_UNMATCH)
888                 fprintf(stderr, ", index mismatch");
889         if (errors & REF_ERR_FILETYPE_UNMATCH)
890                 fprintf(stderr, ", filetype mismatch");
891         if (errors & REF_ERR_NAME_TOO_LONG)
892                 fprintf(stderr, ", name too long");
893         if (errors & REF_ERR_NO_ROOT_REF)
894                 fprintf(stderr, ", no root ref");
895         if (errors & REF_ERR_NO_ROOT_BACKREF)
896                 fprintf(stderr, ", no root backref");
897         if (errors & REF_ERR_DUP_ROOT_REF)
898                 fprintf(stderr, ", dup root ref");
899         if (errors & REF_ERR_DUP_ROOT_BACKREF)
900                 fprintf(stderr, ", dup root backref");
901         fprintf(stderr, "\n");
902 }
903
904 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
905                                           u64 ino, int mod)
906 {
907         struct ptr_node *node;
908         struct cache_extent *cache;
909         struct inode_record *rec = NULL;
910         int ret;
911
912         cache = lookup_cache_extent(inode_cache, ino, 1);
913         if (cache) {
914                 node = container_of(cache, struct ptr_node, cache);
915                 rec = node->data;
916                 if (mod && rec->refs > 1) {
917                         node->data = clone_inode_rec(rec);
918                         if (IS_ERR(node->data))
919                                 return node->data;
920                         rec->refs--;
921                         rec = node->data;
922                 }
923         } else if (mod) {
924                 rec = calloc(1, sizeof(*rec));
925                 if (!rec)
926                         return ERR_PTR(-ENOMEM);
927                 rec->ino = ino;
928                 rec->extent_start = (u64)-1;
929                 rec->refs = 1;
930                 INIT_LIST_HEAD(&rec->backrefs);
931                 INIT_LIST_HEAD(&rec->orphan_extents);
932                 rec->holes = RB_ROOT;
933
934                 node = malloc(sizeof(*node));
935                 if (!node) {
936                         free(rec);
937                         return ERR_PTR(-ENOMEM);
938                 }
939                 node->cache.start = ino;
940                 node->cache.size = 1;
941                 node->data = rec;
942
943                 if (ino == BTRFS_FREE_INO_OBJECTID)
944                         rec->found_link = 1;
945
946                 ret = insert_cache_extent(inode_cache, &node->cache);
947                 if (ret)
948                         return ERR_PTR(-EEXIST);
949         }
950         return rec;
951 }
952
953 static void free_orphan_data_extents(struct list_head *orphan_extents)
954 {
955         struct orphan_data_extent *orphan;
956
957         while (!list_empty(orphan_extents)) {
958                 orphan = list_entry(orphan_extents->next,
959                                     struct orphan_data_extent, list);
960                 list_del(&orphan->list);
961                 free(orphan);
962         }
963 }
964
965 static void free_inode_rec(struct inode_record *rec)
966 {
967         struct inode_backref *backref;
968
969         if (--rec->refs > 0)
970                 return;
971
972         while (!list_empty(&rec->backrefs)) {
973                 backref = to_inode_backref(rec->backrefs.next);
974                 list_del(&backref->list);
975                 free(backref);
976         }
977         free_orphan_data_extents(&rec->orphan_extents);
978         free_file_extent_holes(&rec->holes);
979         free(rec);
980 }
981
982 static int can_free_inode_rec(struct inode_record *rec)
983 {
984         if (!rec->errors && rec->checked && rec->found_inode_item &&
985             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
986                 return 1;
987         return 0;
988 }
989
990 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
991                                  struct inode_record *rec)
992 {
993         struct cache_extent *cache;
994         struct inode_backref *tmp, *backref;
995         struct ptr_node *node;
996         unsigned char filetype;
997
998         if (!rec->found_inode_item)
999                 return;
1000
1001         filetype = imode_to_type(rec->imode);
1002         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1003                 if (backref->found_dir_item && backref->found_dir_index) {
1004                         if (backref->filetype != filetype)
1005                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1006                         if (!backref->errors && backref->found_inode_ref &&
1007                             rec->nlink == rec->found_link) {
1008                                 list_del(&backref->list);
1009                                 free(backref);
1010                         }
1011                 }
1012         }
1013
1014         if (!rec->checked || rec->merging)
1015                 return;
1016
1017         if (S_ISDIR(rec->imode)) {
1018                 if (rec->found_size != rec->isize)
1019                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1020                 if (rec->found_file_extent)
1021                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1022         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1023                 if (rec->found_dir_item)
1024                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1025                 if (rec->found_size != rec->nbytes)
1026                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1027                 if (rec->nlink > 0 && !no_holes &&
1028                     (rec->extent_end < rec->isize ||
1029                      first_extent_gap(&rec->holes) < rec->isize))
1030                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1031         }
1032
1033         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1034                 if (rec->found_csum_item && rec->nodatasum)
1035                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1036                 if (rec->some_csum_missing && !rec->nodatasum)
1037                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1038         }
1039
1040         BUG_ON(rec->refs != 1);
1041         if (can_free_inode_rec(rec)) {
1042                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1043                 node = container_of(cache, struct ptr_node, cache);
1044                 BUG_ON(node->data != rec);
1045                 remove_cache_extent(inode_cache, &node->cache);
1046                 free(node);
1047                 free_inode_rec(rec);
1048         }
1049 }
1050
1051 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1052 {
1053         struct btrfs_path path;
1054         struct btrfs_key key;
1055         int ret;
1056
1057         key.objectid = BTRFS_ORPHAN_OBJECTID;
1058         key.type = BTRFS_ORPHAN_ITEM_KEY;
1059         key.offset = ino;
1060
1061         btrfs_init_path(&path);
1062         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1063         btrfs_release_path(&path);
1064         if (ret > 0)
1065                 ret = -ENOENT;
1066         return ret;
1067 }
1068
1069 static int process_inode_item(struct extent_buffer *eb,
1070                               int slot, struct btrfs_key *key,
1071                               struct shared_node *active_node)
1072 {
1073         struct inode_record *rec;
1074         struct btrfs_inode_item *item;
1075
1076         rec = active_node->current;
1077         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1078         if (rec->found_inode_item) {
1079                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1080                 return 1;
1081         }
1082         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1083         rec->nlink = btrfs_inode_nlink(eb, item);
1084         rec->isize = btrfs_inode_size(eb, item);
1085         rec->nbytes = btrfs_inode_nbytes(eb, item);
1086         rec->imode = btrfs_inode_mode(eb, item);
1087         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1088                 rec->nodatasum = 1;
1089         rec->found_inode_item = 1;
1090         if (rec->nlink == 0)
1091                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1092         maybe_free_inode_rec(&active_node->inode_cache, rec);
1093         return 0;
1094 }
1095
1096 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1097                                                 const char *name,
1098                                                 int namelen, u64 dir)
1099 {
1100         struct inode_backref *backref;
1101
1102         list_for_each_entry(backref, &rec->backrefs, list) {
1103                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1104                         break;
1105                 if (backref->dir != dir || backref->namelen != namelen)
1106                         continue;
1107                 if (memcmp(name, backref->name, namelen))
1108                         continue;
1109                 return backref;
1110         }
1111
1112         backref = malloc(sizeof(*backref) + namelen + 1);
1113         if (!backref)
1114                 return NULL;
1115         memset(backref, 0, sizeof(*backref));
1116         backref->dir = dir;
1117         backref->namelen = namelen;
1118         memcpy(backref->name, name, namelen);
1119         backref->name[namelen] = '\0';
1120         list_add_tail(&backref->list, &rec->backrefs);
1121         return backref;
1122 }
1123
1124 static int add_inode_backref(struct cache_tree *inode_cache,
1125                              u64 ino, u64 dir, u64 index,
1126                              const char *name, int namelen,
1127                              int filetype, int itemtype, int errors)
1128 {
1129         struct inode_record *rec;
1130         struct inode_backref *backref;
1131
1132         rec = get_inode_rec(inode_cache, ino, 1);
1133         BUG_ON(IS_ERR(rec));
1134         backref = get_inode_backref(rec, name, namelen, dir);
1135         BUG_ON(!backref);
1136         if (errors)
1137                 backref->errors |= errors;
1138         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1139                 if (backref->found_dir_index)
1140                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1141                 if (backref->found_inode_ref && backref->index != index)
1142                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1143                 if (backref->found_dir_item && backref->filetype != filetype)
1144                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1145
1146                 backref->index = index;
1147                 backref->filetype = filetype;
1148                 backref->found_dir_index = 1;
1149         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1150                 rec->found_link++;
1151                 if (backref->found_dir_item)
1152                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1153                 if (backref->found_dir_index && backref->filetype != filetype)
1154                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1155
1156                 backref->filetype = filetype;
1157                 backref->found_dir_item = 1;
1158         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1159                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1160                 if (backref->found_inode_ref)
1161                         backref->errors |= REF_ERR_DUP_INODE_REF;
1162                 if (backref->found_dir_index && backref->index != index)
1163                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1164                 else
1165                         backref->index = index;
1166
1167                 backref->ref_type = itemtype;
1168                 backref->found_inode_ref = 1;
1169         } else {
1170                 BUG_ON(1);
1171         }
1172
1173         maybe_free_inode_rec(inode_cache, rec);
1174         return 0;
1175 }
1176
1177 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1178                             struct cache_tree *dst_cache)
1179 {
1180         struct inode_backref *backref;
1181         u32 dir_count = 0;
1182         int ret = 0;
1183
1184         dst->merging = 1;
1185         list_for_each_entry(backref, &src->backrefs, list) {
1186                 if (backref->found_dir_index) {
1187                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1188                                         backref->index, backref->name,
1189                                         backref->namelen, backref->filetype,
1190                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1191                 }
1192                 if (backref->found_dir_item) {
1193                         dir_count++;
1194                         add_inode_backref(dst_cache, dst->ino,
1195                                         backref->dir, 0, backref->name,
1196                                         backref->namelen, backref->filetype,
1197                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1198                 }
1199                 if (backref->found_inode_ref) {
1200                         add_inode_backref(dst_cache, dst->ino,
1201                                         backref->dir, backref->index,
1202                                         backref->name, backref->namelen, 0,
1203                                         backref->ref_type, backref->errors);
1204                 }
1205         }
1206
1207         if (src->found_dir_item)
1208                 dst->found_dir_item = 1;
1209         if (src->found_file_extent)
1210                 dst->found_file_extent = 1;
1211         if (src->found_csum_item)
1212                 dst->found_csum_item = 1;
1213         if (src->some_csum_missing)
1214                 dst->some_csum_missing = 1;
1215         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1216                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1217                 if (ret < 0)
1218                         return ret;
1219         }
1220
1221         BUG_ON(src->found_link < dir_count);
1222         dst->found_link += src->found_link - dir_count;
1223         dst->found_size += src->found_size;
1224         if (src->extent_start != (u64)-1) {
1225                 if (dst->extent_start == (u64)-1) {
1226                         dst->extent_start = src->extent_start;
1227                         dst->extent_end = src->extent_end;
1228                 } else {
1229                         if (dst->extent_end > src->extent_start)
1230                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1231                         else if (dst->extent_end < src->extent_start) {
1232                                 ret = add_file_extent_hole(&dst->holes,
1233                                         dst->extent_end,
1234                                         src->extent_start - dst->extent_end);
1235                         }
1236                         if (dst->extent_end < src->extent_end)
1237                                 dst->extent_end = src->extent_end;
1238                 }
1239         }
1240
1241         dst->errors |= src->errors;
1242         if (src->found_inode_item) {
1243                 if (!dst->found_inode_item) {
1244                         dst->nlink = src->nlink;
1245                         dst->isize = src->isize;
1246                         dst->nbytes = src->nbytes;
1247                         dst->imode = src->imode;
1248                         dst->nodatasum = src->nodatasum;
1249                         dst->found_inode_item = 1;
1250                 } else {
1251                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1252                 }
1253         }
1254         dst->merging = 0;
1255
1256         return 0;
1257 }
1258
1259 static int splice_shared_node(struct shared_node *src_node,
1260                               struct shared_node *dst_node)
1261 {
1262         struct cache_extent *cache;
1263         struct ptr_node *node, *ins;
1264         struct cache_tree *src, *dst;
1265         struct inode_record *rec, *conflict;
1266         u64 current_ino = 0;
1267         int splice = 0;
1268         int ret;
1269
1270         if (--src_node->refs == 0)
1271                 splice = 1;
1272         if (src_node->current)
1273                 current_ino = src_node->current->ino;
1274
1275         src = &src_node->root_cache;
1276         dst = &dst_node->root_cache;
1277 again:
1278         cache = search_cache_extent(src, 0);
1279         while (cache) {
1280                 node = container_of(cache, struct ptr_node, cache);
1281                 rec = node->data;
1282                 cache = next_cache_extent(cache);
1283
1284                 if (splice) {
1285                         remove_cache_extent(src, &node->cache);
1286                         ins = node;
1287                 } else {
1288                         ins = malloc(sizeof(*ins));
1289                         BUG_ON(!ins);
1290                         ins->cache.start = node->cache.start;
1291                         ins->cache.size = node->cache.size;
1292                         ins->data = rec;
1293                         rec->refs++;
1294                 }
1295                 ret = insert_cache_extent(dst, &ins->cache);
1296                 if (ret == -EEXIST) {
1297                         conflict = get_inode_rec(dst, rec->ino, 1);
1298                         BUG_ON(IS_ERR(conflict));
1299                         merge_inode_recs(rec, conflict, dst);
1300                         if (rec->checked) {
1301                                 conflict->checked = 1;
1302                                 if (dst_node->current == conflict)
1303                                         dst_node->current = NULL;
1304                         }
1305                         maybe_free_inode_rec(dst, conflict);
1306                         free_inode_rec(rec);
1307                         free(ins);
1308                 } else {
1309                         BUG_ON(ret);
1310                 }
1311         }
1312
1313         if (src == &src_node->root_cache) {
1314                 src = &src_node->inode_cache;
1315                 dst = &dst_node->inode_cache;
1316                 goto again;
1317         }
1318
1319         if (current_ino > 0 && (!dst_node->current ||
1320             current_ino > dst_node->current->ino)) {
1321                 if (dst_node->current) {
1322                         dst_node->current->checked = 1;
1323                         maybe_free_inode_rec(dst, dst_node->current);
1324                 }
1325                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1326                 BUG_ON(IS_ERR(dst_node->current));
1327         }
1328         return 0;
1329 }
1330
1331 static void free_inode_ptr(struct cache_extent *cache)
1332 {
1333         struct ptr_node *node;
1334         struct inode_record *rec;
1335
1336         node = container_of(cache, struct ptr_node, cache);
1337         rec = node->data;
1338         free_inode_rec(rec);
1339         free(node);
1340 }
1341
1342 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1343
1344 static struct shared_node *find_shared_node(struct cache_tree *shared,
1345                                             u64 bytenr)
1346 {
1347         struct cache_extent *cache;
1348         struct shared_node *node;
1349
1350         cache = lookup_cache_extent(shared, bytenr, 1);
1351         if (cache) {
1352                 node = container_of(cache, struct shared_node, cache);
1353                 return node;
1354         }
1355         return NULL;
1356 }
1357
1358 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1359 {
1360         int ret;
1361         struct shared_node *node;
1362
1363         node = calloc(1, sizeof(*node));
1364         if (!node)
1365                 return -ENOMEM;
1366         node->cache.start = bytenr;
1367         node->cache.size = 1;
1368         cache_tree_init(&node->root_cache);
1369         cache_tree_init(&node->inode_cache);
1370         node->refs = refs;
1371
1372         ret = insert_cache_extent(shared, &node->cache);
1373
1374         return ret;
1375 }
1376
1377 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1378                              struct walk_control *wc, int level)
1379 {
1380         struct shared_node *node;
1381         struct shared_node *dest;
1382         int ret;
1383
1384         if (level == wc->active_node)
1385                 return 0;
1386
1387         BUG_ON(wc->active_node <= level);
1388         node = find_shared_node(&wc->shared, bytenr);
1389         if (!node) {
1390                 ret = add_shared_node(&wc->shared, bytenr, refs);
1391                 BUG_ON(ret);
1392                 node = find_shared_node(&wc->shared, bytenr);
1393                 wc->nodes[level] = node;
1394                 wc->active_node = level;
1395                 return 0;
1396         }
1397
1398         if (wc->root_level == wc->active_node &&
1399             btrfs_root_refs(&root->root_item) == 0) {
1400                 if (--node->refs == 0) {
1401                         free_inode_recs_tree(&node->root_cache);
1402                         free_inode_recs_tree(&node->inode_cache);
1403                         remove_cache_extent(&wc->shared, &node->cache);
1404                         free(node);
1405                 }
1406                 return 1;
1407         }
1408
1409         dest = wc->nodes[wc->active_node];
1410         splice_shared_node(node, dest);
1411         if (node->refs == 0) {
1412                 remove_cache_extent(&wc->shared, &node->cache);
1413                 free(node);
1414         }
1415         return 1;
1416 }
1417
1418 static int leave_shared_node(struct btrfs_root *root,
1419                              struct walk_control *wc, int level)
1420 {
1421         struct shared_node *node;
1422         struct shared_node *dest;
1423         int i;
1424
1425         if (level == wc->root_level)
1426                 return 0;
1427
1428         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1429                 if (wc->nodes[i])
1430                         break;
1431         }
1432         BUG_ON(i >= BTRFS_MAX_LEVEL);
1433
1434         node = wc->nodes[wc->active_node];
1435         wc->nodes[wc->active_node] = NULL;
1436         wc->active_node = i;
1437
1438         dest = wc->nodes[wc->active_node];
1439         if (wc->active_node < wc->root_level ||
1440             btrfs_root_refs(&root->root_item) > 0) {
1441                 BUG_ON(node->refs <= 1);
1442                 splice_shared_node(node, dest);
1443         } else {
1444                 BUG_ON(node->refs < 2);
1445                 node->refs--;
1446         }
1447         return 0;
1448 }
1449
1450 /*
1451  * Returns:
1452  * < 0 - on error
1453  * 1   - if the root with id child_root_id is a child of root parent_root_id
1454  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1455  *       has other root(s) as parent(s)
1456  * 2   - if the root child_root_id doesn't have any parent roots
1457  */
1458 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1459                          u64 child_root_id)
1460 {
1461         struct btrfs_path path;
1462         struct btrfs_key key;
1463         struct extent_buffer *leaf;
1464         int has_parent = 0;
1465         int ret;
1466
1467         btrfs_init_path(&path);
1468
1469         key.objectid = parent_root_id;
1470         key.type = BTRFS_ROOT_REF_KEY;
1471         key.offset = child_root_id;
1472         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1473                                 0, 0);
1474         if (ret < 0)
1475                 return ret;
1476         btrfs_release_path(&path);
1477         if (!ret)
1478                 return 1;
1479
1480         key.objectid = child_root_id;
1481         key.type = BTRFS_ROOT_BACKREF_KEY;
1482         key.offset = 0;
1483         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1484                                 0, 0);
1485         if (ret < 0)
1486                 goto out;
1487
1488         while (1) {
1489                 leaf = path.nodes[0];
1490                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1491                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1492                         if (ret)
1493                                 break;
1494                         leaf = path.nodes[0];
1495                 }
1496
1497                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1498                 if (key.objectid != child_root_id ||
1499                     key.type != BTRFS_ROOT_BACKREF_KEY)
1500                         break;
1501
1502                 has_parent = 1;
1503
1504                 if (key.offset == parent_root_id) {
1505                         btrfs_release_path(&path);
1506                         return 1;
1507                 }
1508
1509                 path.slots[0]++;
1510         }
1511 out:
1512         btrfs_release_path(&path);
1513         if (ret < 0)
1514                 return ret;
1515         return has_parent ? 0 : 2;
1516 }
1517
1518 static int process_dir_item(struct btrfs_root *root,
1519                             struct extent_buffer *eb,
1520                             int slot, struct btrfs_key *key,
1521                             struct shared_node *active_node)
1522 {
1523         u32 total;
1524         u32 cur = 0;
1525         u32 len;
1526         u32 name_len;
1527         u32 data_len;
1528         int error;
1529         int nritems = 0;
1530         int filetype;
1531         struct btrfs_dir_item *di;
1532         struct inode_record *rec;
1533         struct cache_tree *root_cache;
1534         struct cache_tree *inode_cache;
1535         struct btrfs_key location;
1536         char namebuf[BTRFS_NAME_LEN];
1537
1538         root_cache = &active_node->root_cache;
1539         inode_cache = &active_node->inode_cache;
1540         rec = active_node->current;
1541         rec->found_dir_item = 1;
1542
1543         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1544         total = btrfs_item_size_nr(eb, slot);
1545         while (cur < total) {
1546                 nritems++;
1547                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1548                 name_len = btrfs_dir_name_len(eb, di);
1549                 data_len = btrfs_dir_data_len(eb, di);
1550                 filetype = btrfs_dir_type(eb, di);
1551
1552                 rec->found_size += name_len;
1553                 if (name_len <= BTRFS_NAME_LEN) {
1554                         len = name_len;
1555                         error = 0;
1556                 } else {
1557                         len = BTRFS_NAME_LEN;
1558                         error = REF_ERR_NAME_TOO_LONG;
1559                 }
1560                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1561
1562                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1563                         add_inode_backref(inode_cache, location.objectid,
1564                                           key->objectid, key->offset, namebuf,
1565                                           len, filetype, key->type, error);
1566                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1567                         add_inode_backref(root_cache, location.objectid,
1568                                           key->objectid, key->offset,
1569                                           namebuf, len, filetype,
1570                                           key->type, error);
1571                 } else {
1572                         fprintf(stderr, "invalid location in dir item %u\n",
1573                                 location.type);
1574                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1575                                           key->objectid, key->offset, namebuf,
1576                                           len, filetype, key->type, error);
1577                 }
1578
1579                 len = sizeof(*di) + name_len + data_len;
1580                 di = (struct btrfs_dir_item *)((char *)di + len);
1581                 cur += len;
1582         }
1583         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1584                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1585
1586         return 0;
1587 }
1588
1589 static int process_inode_ref(struct extent_buffer *eb,
1590                              int slot, struct btrfs_key *key,
1591                              struct shared_node *active_node)
1592 {
1593         u32 total;
1594         u32 cur = 0;
1595         u32 len;
1596         u32 name_len;
1597         u64 index;
1598         int error;
1599         struct cache_tree *inode_cache;
1600         struct btrfs_inode_ref *ref;
1601         char namebuf[BTRFS_NAME_LEN];
1602
1603         inode_cache = &active_node->inode_cache;
1604
1605         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1606         total = btrfs_item_size_nr(eb, slot);
1607         while (cur < total) {
1608                 name_len = btrfs_inode_ref_name_len(eb, ref);
1609                 index = btrfs_inode_ref_index(eb, ref);
1610                 if (name_len <= BTRFS_NAME_LEN) {
1611                         len = name_len;
1612                         error = 0;
1613                 } else {
1614                         len = BTRFS_NAME_LEN;
1615                         error = REF_ERR_NAME_TOO_LONG;
1616                 }
1617                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1618                 add_inode_backref(inode_cache, key->objectid, key->offset,
1619                                   index, namebuf, len, 0, key->type, error);
1620
1621                 len = sizeof(*ref) + name_len;
1622                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1623                 cur += len;
1624         }
1625         return 0;
1626 }
1627
1628 static int process_inode_extref(struct extent_buffer *eb,
1629                                 int slot, struct btrfs_key *key,
1630                                 struct shared_node *active_node)
1631 {
1632         u32 total;
1633         u32 cur = 0;
1634         u32 len;
1635         u32 name_len;
1636         u64 index;
1637         u64 parent;
1638         int error;
1639         struct cache_tree *inode_cache;
1640         struct btrfs_inode_extref *extref;
1641         char namebuf[BTRFS_NAME_LEN];
1642
1643         inode_cache = &active_node->inode_cache;
1644
1645         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1646         total = btrfs_item_size_nr(eb, slot);
1647         while (cur < total) {
1648                 name_len = btrfs_inode_extref_name_len(eb, extref);
1649                 index = btrfs_inode_extref_index(eb, extref);
1650                 parent = btrfs_inode_extref_parent(eb, extref);
1651                 if (name_len <= BTRFS_NAME_LEN) {
1652                         len = name_len;
1653                         error = 0;
1654                 } else {
1655                         len = BTRFS_NAME_LEN;
1656                         error = REF_ERR_NAME_TOO_LONG;
1657                 }
1658                 read_extent_buffer(eb, namebuf,
1659                                    (unsigned long)(extref + 1), len);
1660                 add_inode_backref(inode_cache, key->objectid, parent,
1661                                   index, namebuf, len, 0, key->type, error);
1662
1663                 len = sizeof(*extref) + name_len;
1664                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1665                 cur += len;
1666         }
1667         return 0;
1668
1669 }
1670
1671 static int count_csum_range(struct btrfs_root *root, u64 start,
1672                             u64 len, u64 *found)
1673 {
1674         struct btrfs_key key;
1675         struct btrfs_path path;
1676         struct extent_buffer *leaf;
1677         int ret;
1678         size_t size;
1679         *found = 0;
1680         u64 csum_end;
1681         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1682
1683         btrfs_init_path(&path);
1684
1685         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1686         key.offset = start;
1687         key.type = BTRFS_EXTENT_CSUM_KEY;
1688
1689         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1690                                 &key, &path, 0, 0);
1691         if (ret < 0)
1692                 goto out;
1693         if (ret > 0 && path.slots[0] > 0) {
1694                 leaf = path.nodes[0];
1695                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1696                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1697                     key.type == BTRFS_EXTENT_CSUM_KEY)
1698                         path.slots[0]--;
1699         }
1700
1701         while (len > 0) {
1702                 leaf = path.nodes[0];
1703                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1704                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1705                         if (ret > 0)
1706                                 break;
1707                         else if (ret < 0)
1708                                 goto out;
1709                         leaf = path.nodes[0];
1710                 }
1711
1712                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1713                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1714                     key.type != BTRFS_EXTENT_CSUM_KEY)
1715                         break;
1716
1717                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1718                 if (key.offset >= start + len)
1719                         break;
1720
1721                 if (key.offset > start)
1722                         start = key.offset;
1723
1724                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1725                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1726                 if (csum_end > start) {
1727                         size = min(csum_end - start, len);
1728                         len -= size;
1729                         start += size;
1730                         *found += size;
1731                 }
1732
1733                 path.slots[0]++;
1734         }
1735 out:
1736         btrfs_release_path(&path);
1737         if (ret < 0)
1738                 return ret;
1739         return 0;
1740 }
1741
1742 static int process_file_extent(struct btrfs_root *root,
1743                                 struct extent_buffer *eb,
1744                                 int slot, struct btrfs_key *key,
1745                                 struct shared_node *active_node)
1746 {
1747         struct inode_record *rec;
1748         struct btrfs_file_extent_item *fi;
1749         u64 num_bytes = 0;
1750         u64 disk_bytenr = 0;
1751         u64 extent_offset = 0;
1752         u64 mask = root->sectorsize - 1;
1753         int extent_type;
1754         int ret;
1755
1756         rec = active_node->current;
1757         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1758         rec->found_file_extent = 1;
1759
1760         if (rec->extent_start == (u64)-1) {
1761                 rec->extent_start = key->offset;
1762                 rec->extent_end = key->offset;
1763         }
1764
1765         if (rec->extent_end > key->offset)
1766                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1767         else if (rec->extent_end < key->offset) {
1768                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1769                                            key->offset - rec->extent_end);
1770                 if (ret < 0)
1771                         return ret;
1772         }
1773
1774         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1775         extent_type = btrfs_file_extent_type(eb, fi);
1776
1777         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1778                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1779                 if (num_bytes == 0)
1780                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1781                 rec->found_size += num_bytes;
1782                 num_bytes = (num_bytes + mask) & ~mask;
1783         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1784                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1785                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1786                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1787                 extent_offset = btrfs_file_extent_offset(eb, fi);
1788                 if (num_bytes == 0 || (num_bytes & mask))
1789                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1790                 if (num_bytes + extent_offset >
1791                     btrfs_file_extent_ram_bytes(eb, fi))
1792                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1793                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1794                     (btrfs_file_extent_compression(eb, fi) ||
1795                      btrfs_file_extent_encryption(eb, fi) ||
1796                      btrfs_file_extent_other_encoding(eb, fi)))
1797                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1798                 if (disk_bytenr > 0)
1799                         rec->found_size += num_bytes;
1800         } else {
1801                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1802         }
1803         rec->extent_end = key->offset + num_bytes;
1804
1805         /*
1806          * The data reloc tree will copy full extents into its inode and then
1807          * copy the corresponding csums.  Because the extent it copied could be
1808          * a preallocated extent that hasn't been written to yet there may be no
1809          * csums to copy, ergo we won't have csums for our file extent.  This is
1810          * ok so just don't bother checking csums if the inode belongs to the
1811          * data reloc tree.
1812          */
1813         if (disk_bytenr > 0 &&
1814             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1815                 u64 found;
1816                 if (btrfs_file_extent_compression(eb, fi))
1817                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1818                 else
1819                         disk_bytenr += extent_offset;
1820
1821                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1822                 if (ret < 0)
1823                         return ret;
1824                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1825                         if (found > 0)
1826                                 rec->found_csum_item = 1;
1827                         if (found < num_bytes)
1828                                 rec->some_csum_missing = 1;
1829                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1830                         if (found > 0)
1831                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1832                 }
1833         }
1834         return 0;
1835 }
1836
1837 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1838                             struct walk_control *wc)
1839 {
1840         struct btrfs_key key;
1841         u32 nritems;
1842         int i;
1843         int ret = 0;
1844         struct cache_tree *inode_cache;
1845         struct shared_node *active_node;
1846
1847         if (wc->root_level == wc->active_node &&
1848             btrfs_root_refs(&root->root_item) == 0)
1849                 return 0;
1850
1851         active_node = wc->nodes[wc->active_node];
1852         inode_cache = &active_node->inode_cache;
1853         nritems = btrfs_header_nritems(eb);
1854         for (i = 0; i < nritems; i++) {
1855                 btrfs_item_key_to_cpu(eb, &key, i);
1856
1857                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1858                         continue;
1859                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1860                         continue;
1861
1862                 if (active_node->current == NULL ||
1863                     active_node->current->ino < key.objectid) {
1864                         if (active_node->current) {
1865                                 active_node->current->checked = 1;
1866                                 maybe_free_inode_rec(inode_cache,
1867                                                      active_node->current);
1868                         }
1869                         active_node->current = get_inode_rec(inode_cache,
1870                                                              key.objectid, 1);
1871                         BUG_ON(IS_ERR(active_node->current));
1872                 }
1873                 switch (key.type) {
1874                 case BTRFS_DIR_ITEM_KEY:
1875                 case BTRFS_DIR_INDEX_KEY:
1876                         ret = process_dir_item(root, eb, i, &key, active_node);
1877                         break;
1878                 case BTRFS_INODE_REF_KEY:
1879                         ret = process_inode_ref(eb, i, &key, active_node);
1880                         break;
1881                 case BTRFS_INODE_EXTREF_KEY:
1882                         ret = process_inode_extref(eb, i, &key, active_node);
1883                         break;
1884                 case BTRFS_INODE_ITEM_KEY:
1885                         ret = process_inode_item(eb, i, &key, active_node);
1886                         break;
1887                 case BTRFS_EXTENT_DATA_KEY:
1888                         ret = process_file_extent(root, eb, i, &key,
1889                                                   active_node);
1890                         break;
1891                 default:
1892                         break;
1893                 };
1894         }
1895         return ret;
1896 }
1897
1898 static void reada_walk_down(struct btrfs_root *root,
1899                             struct extent_buffer *node, int slot)
1900 {
1901         u64 bytenr;
1902         u64 ptr_gen;
1903         u32 nritems;
1904         u32 blocksize;
1905         int i;
1906         int level;
1907
1908         level = btrfs_header_level(node);
1909         if (level != 1)
1910                 return;
1911
1912         nritems = btrfs_header_nritems(node);
1913         blocksize = root->nodesize;
1914         for (i = slot; i < nritems; i++) {
1915                 bytenr = btrfs_node_blockptr(node, i);
1916                 ptr_gen = btrfs_node_ptr_generation(node, i);
1917                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1918         }
1919 }
1920
1921 /*
1922  * Check the child node/leaf by the following condition:
1923  * 1. the first item key of the node/leaf should be the same with the one
1924  *    in parent.
1925  * 2. block in parent node should match the child node/leaf.
1926  * 3. generation of parent node and child's header should be consistent.
1927  *
1928  * Or the child node/leaf pointed by the key in parent is not valid.
1929  *
1930  * We hope to check leaf owner too, but since subvol may share leaves,
1931  * which makes leaf owner check not so strong, key check should be
1932  * sufficient enough for that case.
1933  */
1934 static int check_child_node(struct btrfs_root *root,
1935                             struct extent_buffer *parent, int slot,
1936                             struct extent_buffer *child)
1937 {
1938         struct btrfs_key parent_key;
1939         struct btrfs_key child_key;
1940         int ret = 0;
1941
1942         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1943         if (btrfs_header_level(child) == 0)
1944                 btrfs_item_key_to_cpu(child, &child_key, 0);
1945         else
1946                 btrfs_node_key_to_cpu(child, &child_key, 0);
1947
1948         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1949                 ret = -EINVAL;
1950                 fprintf(stderr,
1951                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1952                         parent_key.objectid, parent_key.type, parent_key.offset,
1953                         child_key.objectid, child_key.type, child_key.offset);
1954         }
1955         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1956                 ret = -EINVAL;
1957                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1958                         btrfs_node_blockptr(parent, slot),
1959                         btrfs_header_bytenr(child));
1960         }
1961         if (btrfs_node_ptr_generation(parent, slot) !=
1962             btrfs_header_generation(child)) {
1963                 ret = -EINVAL;
1964                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1965                         btrfs_header_generation(child),
1966                         btrfs_node_ptr_generation(parent, slot));
1967         }
1968         return ret;
1969 }
1970
1971 struct node_refs {
1972         u64 bytenr[BTRFS_MAX_LEVEL];
1973         u64 refs[BTRFS_MAX_LEVEL];
1974 };
1975
1976 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1977                           struct walk_control *wc, int *level,
1978                           struct node_refs *nrefs)
1979 {
1980         enum btrfs_tree_block_status status;
1981         u64 bytenr;
1982         u64 ptr_gen;
1983         struct extent_buffer *next;
1984         struct extent_buffer *cur;
1985         u32 blocksize;
1986         int ret, err = 0;
1987         u64 refs;
1988
1989         WARN_ON(*level < 0);
1990         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1991
1992         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1993                 refs = nrefs->refs[*level];
1994                 ret = 0;
1995         } else {
1996                 ret = btrfs_lookup_extent_info(NULL, root,
1997                                        path->nodes[*level]->start,
1998                                        *level, 1, &refs, NULL);
1999                 if (ret < 0) {
2000                         err = ret;
2001                         goto out;
2002                 }
2003                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2004                 nrefs->refs[*level] = refs;
2005         }
2006
2007         if (refs > 1) {
2008                 ret = enter_shared_node(root, path->nodes[*level]->start,
2009                                         refs, wc, *level);
2010                 if (ret > 0) {
2011                         err = ret;
2012                         goto out;
2013                 }
2014         }
2015
2016         while (*level >= 0) {
2017                 WARN_ON(*level < 0);
2018                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2019                 cur = path->nodes[*level];
2020
2021                 if (btrfs_header_level(cur) != *level)
2022                         WARN_ON(1);
2023
2024                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2025                         break;
2026                 if (*level == 0) {
2027                         ret = process_one_leaf(root, cur, wc);
2028                         if (ret < 0)
2029                                 err = ret;
2030                         break;
2031                 }
2032                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2033                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2034                 blocksize = root->nodesize;
2035
2036                 if (bytenr == nrefs->bytenr[*level - 1]) {
2037                         refs = nrefs->refs[*level - 1];
2038                 } else {
2039                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2040                                         *level - 1, 1, &refs, NULL);
2041                         if (ret < 0) {
2042                                 refs = 0;
2043                         } else {
2044                                 nrefs->bytenr[*level - 1] = bytenr;
2045                                 nrefs->refs[*level - 1] = refs;
2046                         }
2047                 }
2048
2049                 if (refs > 1) {
2050                         ret = enter_shared_node(root, bytenr, refs,
2051                                                 wc, *level - 1);
2052                         if (ret > 0) {
2053                                 path->slots[*level]++;
2054                                 continue;
2055                         }
2056                 }
2057
2058                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2059                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2060                         free_extent_buffer(next);
2061                         reada_walk_down(root, cur, path->slots[*level]);
2062                         next = read_tree_block(root, bytenr, blocksize,
2063                                                ptr_gen);
2064                         if (!extent_buffer_uptodate(next)) {
2065                                 struct btrfs_key node_key;
2066
2067                                 btrfs_node_key_to_cpu(path->nodes[*level],
2068                                                       &node_key,
2069                                                       path->slots[*level]);
2070                                 btrfs_add_corrupt_extent_record(root->fs_info,
2071                                                 &node_key,
2072                                                 path->nodes[*level]->start,
2073                                                 root->nodesize, *level);
2074                                 err = -EIO;
2075                                 goto out;
2076                         }
2077                 }
2078
2079                 ret = check_child_node(root, cur, path->slots[*level], next);
2080                 if (ret) {
2081                         err = ret;
2082                         goto out;
2083                 }
2084
2085                 if (btrfs_is_leaf(next))
2086                         status = btrfs_check_leaf(root, NULL, next);
2087                 else
2088                         status = btrfs_check_node(root, NULL, next);
2089                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2090                         free_extent_buffer(next);
2091                         err = -EIO;
2092                         goto out;
2093                 }
2094
2095                 *level = *level - 1;
2096                 free_extent_buffer(path->nodes[*level]);
2097                 path->nodes[*level] = next;
2098                 path->slots[*level] = 0;
2099         }
2100 out:
2101         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2102         return err;
2103 }
2104
2105 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2106                         struct walk_control *wc, int *level)
2107 {
2108         int i;
2109         struct extent_buffer *leaf;
2110
2111         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2112                 leaf = path->nodes[i];
2113                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2114                         path->slots[i]++;
2115                         *level = i;
2116                         return 0;
2117                 } else {
2118                         free_extent_buffer(path->nodes[*level]);
2119                         path->nodes[*level] = NULL;
2120                         BUG_ON(*level > wc->active_node);
2121                         if (*level == wc->active_node)
2122                                 leave_shared_node(root, wc, *level);
2123                         *level = i + 1;
2124                 }
2125         }
2126         return 1;
2127 }
2128
2129 static int check_root_dir(struct inode_record *rec)
2130 {
2131         struct inode_backref *backref;
2132         int ret = -1;
2133
2134         if (!rec->found_inode_item || rec->errors)
2135                 goto out;
2136         if (rec->nlink != 1 || rec->found_link != 0)
2137                 goto out;
2138         if (list_empty(&rec->backrefs))
2139                 goto out;
2140         backref = to_inode_backref(rec->backrefs.next);
2141         if (!backref->found_inode_ref)
2142                 goto out;
2143         if (backref->index != 0 || backref->namelen != 2 ||
2144             memcmp(backref->name, "..", 2))
2145                 goto out;
2146         if (backref->found_dir_index || backref->found_dir_item)
2147                 goto out;
2148         ret = 0;
2149 out:
2150         return ret;
2151 }
2152
2153 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2154                               struct btrfs_root *root, struct btrfs_path *path,
2155                               struct inode_record *rec)
2156 {
2157         struct btrfs_inode_item *ei;
2158         struct btrfs_key key;
2159         int ret;
2160
2161         key.objectid = rec->ino;
2162         key.type = BTRFS_INODE_ITEM_KEY;
2163         key.offset = (u64)-1;
2164
2165         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2166         if (ret < 0)
2167                 goto out;
2168         if (ret) {
2169                 if (!path->slots[0]) {
2170                         ret = -ENOENT;
2171                         goto out;
2172                 }
2173                 path->slots[0]--;
2174                 ret = 0;
2175         }
2176         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2177         if (key.objectid != rec->ino) {
2178                 ret = -ENOENT;
2179                 goto out;
2180         }
2181
2182         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2183                             struct btrfs_inode_item);
2184         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2185         btrfs_mark_buffer_dirty(path->nodes[0]);
2186         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2187         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2188                root->root_key.objectid);
2189 out:
2190         btrfs_release_path(path);
2191         return ret;
2192 }
2193
2194 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2195                                     struct btrfs_root *root,
2196                                     struct btrfs_path *path,
2197                                     struct inode_record *rec)
2198 {
2199         int ret;
2200
2201         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2202         btrfs_release_path(path);
2203         if (!ret)
2204                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2205         return ret;
2206 }
2207
2208 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2209                                struct btrfs_root *root,
2210                                struct btrfs_path *path,
2211                                struct inode_record *rec)
2212 {
2213         struct btrfs_inode_item *ei;
2214         struct btrfs_key key;
2215         int ret = 0;
2216
2217         key.objectid = rec->ino;
2218         key.type = BTRFS_INODE_ITEM_KEY;
2219         key.offset = 0;
2220
2221         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2222         if (ret) {
2223                 if (ret > 0)
2224                         ret = -ENOENT;
2225                 goto out;
2226         }
2227
2228         /* Since ret == 0, no need to check anything */
2229         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2230                             struct btrfs_inode_item);
2231         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2232         btrfs_mark_buffer_dirty(path->nodes[0]);
2233         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2234         printf("reset nbytes for ino %llu root %llu\n",
2235                rec->ino, root->root_key.objectid);
2236 out:
2237         btrfs_release_path(path);
2238         return ret;
2239 }
2240
2241 static int add_missing_dir_index(struct btrfs_root *root,
2242                                  struct cache_tree *inode_cache,
2243                                  struct inode_record *rec,
2244                                  struct inode_backref *backref)
2245 {
2246         struct btrfs_path *path;
2247         struct btrfs_trans_handle *trans;
2248         struct btrfs_dir_item *dir_item;
2249         struct extent_buffer *leaf;
2250         struct btrfs_key key;
2251         struct btrfs_disk_key disk_key;
2252         struct inode_record *dir_rec;
2253         unsigned long name_ptr;
2254         u32 data_size = sizeof(*dir_item) + backref->namelen;
2255         int ret;
2256
2257         path = btrfs_alloc_path();
2258         if (!path)
2259                 return -ENOMEM;
2260
2261         trans = btrfs_start_transaction(root, 1);
2262         if (IS_ERR(trans)) {
2263                 btrfs_free_path(path);
2264                 return PTR_ERR(trans);
2265         }
2266
2267         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2268                 (unsigned long long)rec->ino);
2269         key.objectid = backref->dir;
2270         key.type = BTRFS_DIR_INDEX_KEY;
2271         key.offset = backref->index;
2272
2273         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2274         BUG_ON(ret);
2275
2276         leaf = path->nodes[0];
2277         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2278
2279         disk_key.objectid = cpu_to_le64(rec->ino);
2280         disk_key.type = BTRFS_INODE_ITEM_KEY;
2281         disk_key.offset = 0;
2282
2283         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2284         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2285         btrfs_set_dir_data_len(leaf, dir_item, 0);
2286         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2287         name_ptr = (unsigned long)(dir_item + 1);
2288         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2289         btrfs_mark_buffer_dirty(leaf);
2290         btrfs_free_path(path);
2291         btrfs_commit_transaction(trans, root);
2292
2293         backref->found_dir_index = 1;
2294         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2295         BUG_ON(IS_ERR(dir_rec));
2296         if (!dir_rec)
2297                 return 0;
2298         dir_rec->found_size += backref->namelen;
2299         if (dir_rec->found_size == dir_rec->isize &&
2300             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2301                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2302         if (dir_rec->found_size != dir_rec->isize)
2303                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2304
2305         return 0;
2306 }
2307
2308 static int delete_dir_index(struct btrfs_root *root,
2309                             struct cache_tree *inode_cache,
2310                             struct inode_record *rec,
2311                             struct inode_backref *backref)
2312 {
2313         struct btrfs_trans_handle *trans;
2314         struct btrfs_dir_item *di;
2315         struct btrfs_path *path;
2316         int ret = 0;
2317
2318         path = btrfs_alloc_path();
2319         if (!path)
2320                 return -ENOMEM;
2321
2322         trans = btrfs_start_transaction(root, 1);
2323         if (IS_ERR(trans)) {
2324                 btrfs_free_path(path);
2325                 return PTR_ERR(trans);
2326         }
2327
2328
2329         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2330                 (unsigned long long)backref->dir,
2331                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2332                 (unsigned long long)root->objectid);
2333
2334         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2335                                     backref->name, backref->namelen,
2336                                     backref->index, -1);
2337         if (IS_ERR(di)) {
2338                 ret = PTR_ERR(di);
2339                 btrfs_free_path(path);
2340                 btrfs_commit_transaction(trans, root);
2341                 if (ret == -ENOENT)
2342                         return 0;
2343                 return ret;
2344         }
2345
2346         if (!di)
2347                 ret = btrfs_del_item(trans, root, path);
2348         else
2349                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2350         BUG_ON(ret);
2351         btrfs_free_path(path);
2352         btrfs_commit_transaction(trans, root);
2353         return ret;
2354 }
2355
2356 static int create_inode_item(struct btrfs_root *root,
2357                              struct inode_record *rec,
2358                              struct inode_backref *backref, int root_dir)
2359 {
2360         struct btrfs_trans_handle *trans;
2361         struct btrfs_inode_item inode_item;
2362         time_t now = time(NULL);
2363         int ret;
2364
2365         trans = btrfs_start_transaction(root, 1);
2366         if (IS_ERR(trans)) {
2367                 ret = PTR_ERR(trans);
2368                 return ret;
2369         }
2370
2371         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2372                 "be incomplete, please check permissions and content after "
2373                 "the fsck completes.\n", (unsigned long long)root->objectid,
2374                 (unsigned long long)rec->ino);
2375
2376         memset(&inode_item, 0, sizeof(inode_item));
2377         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2378         if (root_dir)
2379                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2380         else
2381                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2382         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2383         if (rec->found_dir_item) {
2384                 if (rec->found_file_extent)
2385                         fprintf(stderr, "root %llu inode %llu has both a dir "
2386                                 "item and extents, unsure if it is a dir or a "
2387                                 "regular file so setting it as a directory\n",
2388                                 (unsigned long long)root->objectid,
2389                                 (unsigned long long)rec->ino);
2390                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2391                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2392         } else if (!rec->found_dir_item) {
2393                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2394                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2395         }
2396         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2397         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2398         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2399         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2400         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2401         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2402         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2403         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2404
2405         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2406         BUG_ON(ret);
2407         btrfs_commit_transaction(trans, root);
2408         return 0;
2409 }
2410
2411 static int repair_inode_backrefs(struct btrfs_root *root,
2412                                  struct inode_record *rec,
2413                                  struct cache_tree *inode_cache,
2414                                  int delete)
2415 {
2416         struct inode_backref *tmp, *backref;
2417         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2418         int ret = 0;
2419         int repaired = 0;
2420
2421         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2422                 if (!delete && rec->ino == root_dirid) {
2423                         if (!rec->found_inode_item) {
2424                                 ret = create_inode_item(root, rec, backref, 1);
2425                                 if (ret)
2426                                         break;
2427                                 repaired++;
2428                         }
2429                 }
2430
2431                 /* Index 0 for root dir's are special, don't mess with it */
2432                 if (rec->ino == root_dirid && backref->index == 0)
2433                         continue;
2434
2435                 if (delete &&
2436                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2437                      (backref->found_dir_index && backref->found_inode_ref &&
2438                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2439                         ret = delete_dir_index(root, inode_cache, rec, backref);
2440                         if (ret)
2441                                 break;
2442                         repaired++;
2443                         list_del(&backref->list);
2444                         free(backref);
2445                 }
2446
2447                 if (!delete && !backref->found_dir_index &&
2448                     backref->found_dir_item && backref->found_inode_ref) {
2449                         ret = add_missing_dir_index(root, inode_cache, rec,
2450                                                     backref);
2451                         if (ret)
2452                                 break;
2453                         repaired++;
2454                         if (backref->found_dir_item &&
2455                             backref->found_dir_index &&
2456                             backref->found_dir_index) {
2457                                 if (!backref->errors &&
2458                                     backref->found_inode_ref) {
2459                                         list_del(&backref->list);
2460                                         free(backref);
2461                                 }
2462                         }
2463                 }
2464
2465                 if (!delete && (!backref->found_dir_index &&
2466                                 !backref->found_dir_item &&
2467                                 backref->found_inode_ref)) {
2468                         struct btrfs_trans_handle *trans;
2469                         struct btrfs_key location;
2470
2471                         ret = check_dir_conflict(root, backref->name,
2472                                                  backref->namelen,
2473                                                  backref->dir,
2474                                                  backref->index);
2475                         if (ret) {
2476                                 /*
2477                                  * let nlink fixing routine to handle it,
2478                                  * which can do it better.
2479                                  */
2480                                 ret = 0;
2481                                 break;
2482                         }
2483                         location.objectid = rec->ino;
2484                         location.type = BTRFS_INODE_ITEM_KEY;
2485                         location.offset = 0;
2486
2487                         trans = btrfs_start_transaction(root, 1);
2488                         if (IS_ERR(trans)) {
2489                                 ret = PTR_ERR(trans);
2490                                 break;
2491                         }
2492                         fprintf(stderr, "adding missing dir index/item pair "
2493                                 "for inode %llu\n",
2494                                 (unsigned long long)rec->ino);
2495                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2496                                                     backref->namelen,
2497                                                     backref->dir, &location,
2498                                                     imode_to_type(rec->imode),
2499                                                     backref->index);
2500                         BUG_ON(ret);
2501                         btrfs_commit_transaction(trans, root);
2502                         repaired++;
2503                 }
2504
2505                 if (!delete && (backref->found_inode_ref &&
2506                                 backref->found_dir_index &&
2507                                 backref->found_dir_item &&
2508                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2509                                 !rec->found_inode_item)) {
2510                         ret = create_inode_item(root, rec, backref, 0);
2511                         if (ret)
2512                                 break;
2513                         repaired++;
2514                 }
2515
2516         }
2517         return ret ? ret : repaired;
2518 }
2519
2520 /*
2521  * To determine the file type for nlink/inode_item repair
2522  *
2523  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2524  * Return -ENOENT if file type is not found.
2525  */
2526 static int find_file_type(struct inode_record *rec, u8 *type)
2527 {
2528         struct inode_backref *backref;
2529
2530         /* For inode item recovered case */
2531         if (rec->found_inode_item) {
2532                 *type = imode_to_type(rec->imode);
2533                 return 0;
2534         }
2535
2536         list_for_each_entry(backref, &rec->backrefs, list) {
2537                 if (backref->found_dir_index || backref->found_dir_item) {
2538                         *type = backref->filetype;
2539                         return 0;
2540                 }
2541         }
2542         return -ENOENT;
2543 }
2544
2545 /*
2546  * To determine the file name for nlink repair
2547  *
2548  * Return 0 if file name is found, set name and namelen.
2549  * Return -ENOENT if file name is not found.
2550  */
2551 static int find_file_name(struct inode_record *rec,
2552                           char *name, int *namelen)
2553 {
2554         struct inode_backref *backref;
2555
2556         list_for_each_entry(backref, &rec->backrefs, list) {
2557                 if (backref->found_dir_index || backref->found_dir_item ||
2558                     backref->found_inode_ref) {
2559                         memcpy(name, backref->name, backref->namelen);
2560                         *namelen = backref->namelen;
2561                         return 0;
2562                 }
2563         }
2564         return -ENOENT;
2565 }
2566
2567 /* Reset the nlink of the inode to the correct one */
2568 static int reset_nlink(struct btrfs_trans_handle *trans,
2569                        struct btrfs_root *root,
2570                        struct btrfs_path *path,
2571                        struct inode_record *rec)
2572 {
2573         struct inode_backref *backref;
2574         struct inode_backref *tmp;
2575         struct btrfs_key key;
2576         struct btrfs_inode_item *inode_item;
2577         int ret = 0;
2578
2579         /* We don't believe this either, reset it and iterate backref */
2580         rec->found_link = 0;
2581
2582         /* Remove all backref including the valid ones */
2583         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2584                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2585                                    backref->index, backref->name,
2586                                    backref->namelen, 0);
2587                 if (ret < 0)
2588                         goto out;
2589
2590                 /* remove invalid backref, so it won't be added back */
2591                 if (!(backref->found_dir_index &&
2592                       backref->found_dir_item &&
2593                       backref->found_inode_ref)) {
2594                         list_del(&backref->list);
2595                         free(backref);
2596                 } else {
2597                         rec->found_link++;
2598                 }
2599         }
2600
2601         /* Set nlink to 0 */
2602         key.objectid = rec->ino;
2603         key.type = BTRFS_INODE_ITEM_KEY;
2604         key.offset = 0;
2605         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2606         if (ret < 0)
2607                 goto out;
2608         if (ret > 0) {
2609                 ret = -ENOENT;
2610                 goto out;
2611         }
2612         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2613                                     struct btrfs_inode_item);
2614         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2615         btrfs_mark_buffer_dirty(path->nodes[0]);
2616         btrfs_release_path(path);
2617
2618         /*
2619          * Add back valid inode_ref/dir_item/dir_index,
2620          * add_link() will handle the nlink inc, so new nlink must be correct
2621          */
2622         list_for_each_entry(backref, &rec->backrefs, list) {
2623                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2624                                      backref->name, backref->namelen,
2625                                      backref->filetype, &backref->index, 1);
2626                 if (ret < 0)
2627                         goto out;
2628         }
2629 out:
2630         btrfs_release_path(path);
2631         return ret;
2632 }
2633
2634 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2635                                struct btrfs_root *root,
2636                                struct btrfs_path *path,
2637                                struct inode_record *rec)
2638 {
2639         char *dir_name = "lost+found";
2640         char namebuf[BTRFS_NAME_LEN] = {0};
2641         u64 lost_found_ino;
2642         u32 mode = 0700;
2643         u8 type = 0;
2644         int namelen = 0;
2645         int name_recovered = 0;
2646         int type_recovered = 0;
2647         int ret = 0;
2648
2649         /*
2650          * Get file name and type first before these invalid inode ref
2651          * are deleted by remove_all_invalid_backref()
2652          */
2653         name_recovered = !find_file_name(rec, namebuf, &namelen);
2654         type_recovered = !find_file_type(rec, &type);
2655
2656         if (!name_recovered) {
2657                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2658                        rec->ino, rec->ino);
2659                 namelen = count_digits(rec->ino);
2660                 sprintf(namebuf, "%llu", rec->ino);
2661                 name_recovered = 1;
2662         }
2663         if (!type_recovered) {
2664                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2665                        rec->ino);
2666                 type = BTRFS_FT_REG_FILE;
2667                 type_recovered = 1;
2668         }
2669
2670         ret = reset_nlink(trans, root, path, rec);
2671         if (ret < 0) {
2672                 fprintf(stderr,
2673                         "Failed to reset nlink for inode %llu: %s\n",
2674                         rec->ino, strerror(-ret));
2675                 goto out;
2676         }
2677
2678         if (rec->found_link == 0) {
2679                 lost_found_ino = root->highest_inode;
2680                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2681                         ret = -EOVERFLOW;
2682                         goto out;
2683                 }
2684                 lost_found_ino++;
2685                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2686                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2687                                   mode);
2688                 if (ret < 0) {
2689                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2690                                 dir_name, strerror(-ret));
2691                         goto out;
2692                 }
2693                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2694                                      namebuf, namelen, type, NULL, 1);
2695                 /*
2696                  * Add ".INO" suffix several times to handle case where
2697                  * "FILENAME.INO" is already taken by another file.
2698                  */
2699                 while (ret == -EEXIST) {
2700                         /*
2701                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2702                          */
2703                         if (namelen + count_digits(rec->ino) + 1 >
2704                             BTRFS_NAME_LEN) {
2705                                 ret = -EFBIG;
2706                                 goto out;
2707                         }
2708                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2709                                  ".%llu", rec->ino);
2710                         namelen += count_digits(rec->ino) + 1;
2711                         ret = btrfs_add_link(trans, root, rec->ino,
2712                                              lost_found_ino, namebuf,
2713                                              namelen, type, NULL, 1);
2714                 }
2715                 if (ret < 0) {
2716                         fprintf(stderr,
2717                                 "Failed to link the inode %llu to %s dir: %s\n",
2718                                 rec->ino, dir_name, strerror(-ret));
2719                         goto out;
2720                 }
2721                 /*
2722                  * Just increase the found_link, don't actually add the
2723                  * backref. This will make things easier and this inode
2724                  * record will be freed after the repair is done.
2725                  * So fsck will not report problem about this inode.
2726                  */
2727                 rec->found_link++;
2728                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2729                        namelen, namebuf, dir_name);
2730         }
2731         printf("Fixed the nlink of inode %llu\n", rec->ino);
2732 out:
2733         /*
2734          * Clear the flag anyway, or we will loop forever for the same inode
2735          * as it will not be removed from the bad inode list and the dead loop
2736          * happens.
2737          */
2738         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2739         btrfs_release_path(path);
2740         return ret;
2741 }
2742
2743 /*
2744  * Check if there is any normal(reg or prealloc) file extent for given
2745  * ino.
2746  * This is used to determine the file type when neither its dir_index/item or
2747  * inode_item exists.
2748  *
2749  * This will *NOT* report error, if any error happens, just consider it does
2750  * not have any normal file extent.
2751  */
2752 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2753 {
2754         struct btrfs_path *path;
2755         struct btrfs_key key;
2756         struct btrfs_key found_key;
2757         struct btrfs_file_extent_item *fi;
2758         u8 type;
2759         int ret = 0;
2760
2761         path = btrfs_alloc_path();
2762         if (!path)
2763                 goto out;
2764         key.objectid = ino;
2765         key.type = BTRFS_EXTENT_DATA_KEY;
2766         key.offset = 0;
2767
2768         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2769         if (ret < 0) {
2770                 ret = 0;
2771                 goto out;
2772         }
2773         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2774                 ret = btrfs_next_leaf(root, path);
2775                 if (ret) {
2776                         ret = 0;
2777                         goto out;
2778                 }
2779         }
2780         while (1) {
2781                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2782                                       path->slots[0]);
2783                 if (found_key.objectid != ino ||
2784                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2785                         break;
2786                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2787                                     struct btrfs_file_extent_item);
2788                 type = btrfs_file_extent_type(path->nodes[0], fi);
2789                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2790                         ret = 1;
2791                         goto out;
2792                 }
2793         }
2794 out:
2795         btrfs_free_path(path);
2796         return ret;
2797 }
2798
2799 static u32 btrfs_type_to_imode(u8 type)
2800 {
2801         static u32 imode_by_btrfs_type[] = {
2802                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2803                 [BTRFS_FT_DIR]          = S_IFDIR,
2804                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2805                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2806                 [BTRFS_FT_FIFO]         = S_IFIFO,
2807                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2808                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2809         };
2810
2811         return imode_by_btrfs_type[(type)];
2812 }
2813
2814 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2815                                 struct btrfs_root *root,
2816                                 struct btrfs_path *path,
2817                                 struct inode_record *rec)
2818 {
2819         u8 filetype;
2820         u32 mode = 0700;
2821         int type_recovered = 0;
2822         int ret = 0;
2823
2824         printf("Trying to rebuild inode:%llu\n", rec->ino);
2825
2826         type_recovered = !find_file_type(rec, &filetype);
2827
2828         /*
2829          * Try to determine inode type if type not found.
2830          *
2831          * For found regular file extent, it must be FILE.
2832          * For found dir_item/index, it must be DIR.
2833          *
2834          * For undetermined one, use FILE as fallback.
2835          *
2836          * TODO:
2837          * 1. If found backref(inode_index/item is already handled) to it,
2838          *    it must be DIR.
2839          *    Need new inode-inode ref structure to allow search for that.
2840          */
2841         if (!type_recovered) {
2842                 if (rec->found_file_extent &&
2843                     find_normal_file_extent(root, rec->ino)) {
2844                         type_recovered = 1;
2845                         filetype = BTRFS_FT_REG_FILE;
2846                 } else if (rec->found_dir_item) {
2847                         type_recovered = 1;
2848                         filetype = BTRFS_FT_DIR;
2849                 } else if (!list_empty(&rec->orphan_extents)) {
2850                         type_recovered = 1;
2851                         filetype = BTRFS_FT_REG_FILE;
2852                 } else{
2853                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2854                                rec->ino);
2855                         type_recovered = 1;
2856                         filetype = BTRFS_FT_REG_FILE;
2857                 }
2858         }
2859
2860         ret = btrfs_new_inode(trans, root, rec->ino,
2861                               mode | btrfs_type_to_imode(filetype));
2862         if (ret < 0)
2863                 goto out;
2864
2865         /*
2866          * Here inode rebuild is done, we only rebuild the inode item,
2867          * don't repair the nlink(like move to lost+found).
2868          * That is the job of nlink repair.
2869          *
2870          * We just fill the record and return
2871          */
2872         rec->found_dir_item = 1;
2873         rec->imode = mode | btrfs_type_to_imode(filetype);
2874         rec->nlink = 0;
2875         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2876         /* Ensure the inode_nlinks repair function will be called */
2877         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2878 out:
2879         return ret;
2880 }
2881
2882 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2883                                       struct btrfs_root *root,
2884                                       struct btrfs_path *path,
2885                                       struct inode_record *rec)
2886 {
2887         struct orphan_data_extent *orphan;
2888         struct orphan_data_extent *tmp;
2889         int ret = 0;
2890
2891         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2892                 /*
2893                  * Check for conflicting file extents
2894                  *
2895                  * Here we don't know whether the extents is compressed or not,
2896                  * so we can only assume it not compressed nor data offset,
2897                  * and use its disk_len as extent length.
2898                  */
2899                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2900                                        orphan->offset, orphan->disk_len, 0);
2901                 btrfs_release_path(path);
2902                 if (ret < 0)
2903                         goto out;
2904                 if (!ret) {
2905                         fprintf(stderr,
2906                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2907                                 orphan->disk_bytenr, orphan->disk_len);
2908                         ret = btrfs_free_extent(trans,
2909                                         root->fs_info->extent_root,
2910                                         orphan->disk_bytenr, orphan->disk_len,
2911                                         0, root->objectid, orphan->objectid,
2912                                         orphan->offset);
2913                         if (ret < 0)
2914                                 goto out;
2915                 }
2916                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2917                                 orphan->offset, orphan->disk_bytenr,
2918                                 orphan->disk_len, orphan->disk_len);
2919                 if (ret < 0)
2920                         goto out;
2921
2922                 /* Update file size info */
2923                 rec->found_size += orphan->disk_len;
2924                 if (rec->found_size == rec->nbytes)
2925                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2926
2927                 /* Update the file extent hole info too */
2928                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2929                                            orphan->disk_len);
2930                 if (ret < 0)
2931                         goto out;
2932                 if (RB_EMPTY_ROOT(&rec->holes))
2933                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2934
2935                 list_del(&orphan->list);
2936                 free(orphan);
2937         }
2938         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2939 out:
2940         return ret;
2941 }
2942
2943 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2944                                         struct btrfs_root *root,
2945                                         struct btrfs_path *path,
2946                                         struct inode_record *rec)
2947 {
2948         struct rb_node *node;
2949         struct file_extent_hole *hole;
2950         int found = 0;
2951         int ret = 0;
2952
2953         node = rb_first(&rec->holes);
2954
2955         while (node) {
2956                 found = 1;
2957                 hole = rb_entry(node, struct file_extent_hole, node);
2958                 ret = btrfs_punch_hole(trans, root, rec->ino,
2959                                        hole->start, hole->len);
2960                 if (ret < 0)
2961                         goto out;
2962                 ret = del_file_extent_hole(&rec->holes, hole->start,
2963                                            hole->len);
2964                 if (ret < 0)
2965                         goto out;
2966                 if (RB_EMPTY_ROOT(&rec->holes))
2967                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2968                 node = rb_first(&rec->holes);
2969         }
2970         /* special case for a file losing all its file extent */
2971         if (!found) {
2972                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2973                                        round_up(rec->isize, root->sectorsize));
2974                 if (ret < 0)
2975                         goto out;
2976         }
2977         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2978                rec->ino, root->objectid);
2979 out:
2980         return ret;
2981 }
2982
2983 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2984 {
2985         struct btrfs_trans_handle *trans;
2986         struct btrfs_path *path;
2987         int ret = 0;
2988
2989         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2990                              I_ERR_NO_ORPHAN_ITEM |
2991                              I_ERR_LINK_COUNT_WRONG |
2992                              I_ERR_NO_INODE_ITEM |
2993                              I_ERR_FILE_EXTENT_ORPHAN |
2994                              I_ERR_FILE_EXTENT_DISCOUNT|
2995                              I_ERR_FILE_NBYTES_WRONG)))
2996                 return rec->errors;
2997
2998         path = btrfs_alloc_path();
2999         if (!path)
3000                 return -ENOMEM;
3001
3002         /*
3003          * For nlink repair, it may create a dir and add link, so
3004          * 2 for parent(256)'s dir_index and dir_item
3005          * 2 for lost+found dir's inode_item and inode_ref
3006          * 1 for the new inode_ref of the file
3007          * 2 for lost+found dir's dir_index and dir_item for the file
3008          */
3009         trans = btrfs_start_transaction(root, 7);
3010         if (IS_ERR(trans)) {
3011                 btrfs_free_path(path);
3012                 return PTR_ERR(trans);
3013         }
3014
3015         if (rec->errors & I_ERR_NO_INODE_ITEM)
3016                 ret = repair_inode_no_item(trans, root, path, rec);
3017         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3018                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3019         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3020                 ret = repair_inode_discount_extent(trans, root, path, rec);
3021         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3022                 ret = repair_inode_isize(trans, root, path, rec);
3023         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3024                 ret = repair_inode_orphan_item(trans, root, path, rec);
3025         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3026                 ret = repair_inode_nlinks(trans, root, path, rec);
3027         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3028                 ret = repair_inode_nbytes(trans, root, path, rec);
3029         btrfs_commit_transaction(trans, root);
3030         btrfs_free_path(path);
3031         return ret;
3032 }
3033
3034 static int check_inode_recs(struct btrfs_root *root,
3035                             struct cache_tree *inode_cache)
3036 {
3037         struct cache_extent *cache;
3038         struct ptr_node *node;
3039         struct inode_record *rec;
3040         struct inode_backref *backref;
3041         int stage = 0;
3042         int ret = 0;
3043         int err = 0;
3044         u64 error = 0;
3045         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3046
3047         if (btrfs_root_refs(&root->root_item) == 0) {
3048                 if (!cache_tree_empty(inode_cache))
3049                         fprintf(stderr, "warning line %d\n", __LINE__);
3050                 return 0;
3051         }
3052
3053         /*
3054          * We need to record the highest inode number for later 'lost+found'
3055          * dir creation.
3056          * We must select an ino not used/referred by any existing inode, or
3057          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3058          * this may cause 'lost+found' dir has wrong nlinks.
3059          */
3060         cache = last_cache_extent(inode_cache);
3061         if (cache) {
3062                 node = container_of(cache, struct ptr_node, cache);
3063                 rec = node->data;
3064                 if (rec->ino > root->highest_inode)
3065                         root->highest_inode = rec->ino;
3066         }
3067
3068         /*
3069          * We need to repair backrefs first because we could change some of the
3070          * errors in the inode recs.
3071          *
3072          * We also need to go through and delete invalid backrefs first and then
3073          * add the correct ones second.  We do this because we may get EEXIST
3074          * when adding back the correct index because we hadn't yet deleted the
3075          * invalid index.
3076          *
3077          * For example, if we were missing a dir index then the directories
3078          * isize would be wrong, so if we fixed the isize to what we thought it
3079          * would be and then fixed the backref we'd still have a invalid fs, so
3080          * we need to add back the dir index and then check to see if the isize
3081          * is still wrong.
3082          */
3083         while (stage < 3) {
3084                 stage++;
3085                 if (stage == 3 && !err)
3086                         break;
3087
3088                 cache = search_cache_extent(inode_cache, 0);
3089                 while (repair && cache) {
3090                         node = container_of(cache, struct ptr_node, cache);
3091                         rec = node->data;
3092                         cache = next_cache_extent(cache);
3093
3094                         /* Need to free everything up and rescan */
3095                         if (stage == 3) {
3096                                 remove_cache_extent(inode_cache, &node->cache);
3097                                 free(node);
3098                                 free_inode_rec(rec);
3099                                 continue;
3100                         }
3101
3102                         if (list_empty(&rec->backrefs))
3103                                 continue;
3104
3105                         ret = repair_inode_backrefs(root, rec, inode_cache,
3106                                                     stage == 1);
3107                         if (ret < 0) {
3108                                 err = ret;
3109                                 stage = 2;
3110                                 break;
3111                         } if (ret > 0) {
3112                                 err = -EAGAIN;
3113                         }
3114                 }
3115         }
3116         if (err)
3117                 return err;
3118
3119         rec = get_inode_rec(inode_cache, root_dirid, 0);
3120         BUG_ON(IS_ERR(rec));
3121         if (rec) {
3122                 ret = check_root_dir(rec);
3123                 if (ret) {
3124                         fprintf(stderr, "root %llu root dir %llu error\n",
3125                                 (unsigned long long)root->root_key.objectid,
3126                                 (unsigned long long)root_dirid);
3127                         print_inode_error(root, rec);
3128                         error++;
3129                 }
3130         } else {
3131                 if (repair) {
3132                         struct btrfs_trans_handle *trans;
3133
3134                         trans = btrfs_start_transaction(root, 1);
3135                         if (IS_ERR(trans)) {
3136                                 err = PTR_ERR(trans);
3137                                 return err;
3138                         }
3139
3140                         fprintf(stderr,
3141                                 "root %llu missing its root dir, recreating\n",
3142                                 (unsigned long long)root->objectid);
3143
3144                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3145                         BUG_ON(ret);
3146
3147                         btrfs_commit_transaction(trans, root);
3148                         return -EAGAIN;
3149                 }
3150
3151                 fprintf(stderr, "root %llu root dir %llu not found\n",
3152                         (unsigned long long)root->root_key.objectid,
3153                         (unsigned long long)root_dirid);
3154         }
3155
3156         while (1) {
3157                 cache = search_cache_extent(inode_cache, 0);
3158                 if (!cache)
3159                         break;
3160                 node = container_of(cache, struct ptr_node, cache);
3161                 rec = node->data;
3162                 remove_cache_extent(inode_cache, &node->cache);
3163                 free(node);
3164                 if (rec->ino == root_dirid ||
3165                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3166                         free_inode_rec(rec);
3167                         continue;
3168                 }
3169
3170                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3171                         ret = check_orphan_item(root, rec->ino);
3172                         if (ret == 0)
3173                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3174                         if (can_free_inode_rec(rec)) {
3175                                 free_inode_rec(rec);
3176                                 continue;
3177                         }
3178                 }
3179
3180                 if (!rec->found_inode_item)
3181                         rec->errors |= I_ERR_NO_INODE_ITEM;
3182                 if (rec->found_link != rec->nlink)
3183                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3184                 if (repair) {
3185                         ret = try_repair_inode(root, rec);
3186                         if (ret == 0 && can_free_inode_rec(rec)) {
3187                                 free_inode_rec(rec);
3188                                 continue;
3189                         }
3190                         ret = 0;
3191                 }
3192
3193                 if (!(repair && ret == 0))
3194                         error++;
3195                 print_inode_error(root, rec);
3196                 list_for_each_entry(backref, &rec->backrefs, list) {
3197                         if (!backref->found_dir_item)
3198                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3199                         if (!backref->found_dir_index)
3200                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3201                         if (!backref->found_inode_ref)
3202                                 backref->errors |= REF_ERR_NO_INODE_REF;
3203                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3204                                 " namelen %u name %s filetype %d errors %x",
3205                                 (unsigned long long)backref->dir,
3206                                 (unsigned long long)backref->index,
3207                                 backref->namelen, backref->name,
3208                                 backref->filetype, backref->errors);
3209                         print_ref_error(backref->errors);
3210                 }
3211                 free_inode_rec(rec);
3212         }
3213         return (error > 0) ? -1 : 0;
3214 }
3215
3216 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3217                                         u64 objectid)
3218 {
3219         struct cache_extent *cache;
3220         struct root_record *rec = NULL;
3221         int ret;
3222
3223         cache = lookup_cache_extent(root_cache, objectid, 1);
3224         if (cache) {
3225                 rec = container_of(cache, struct root_record, cache);
3226         } else {
3227                 rec = calloc(1, sizeof(*rec));
3228                 if (!rec)
3229                         return ERR_PTR(-ENOMEM);
3230                 rec->objectid = objectid;
3231                 INIT_LIST_HEAD(&rec->backrefs);
3232                 rec->cache.start = objectid;
3233                 rec->cache.size = 1;
3234
3235                 ret = insert_cache_extent(root_cache, &rec->cache);
3236                 if (ret)
3237                         return ERR_PTR(-EEXIST);
3238         }
3239         return rec;
3240 }
3241
3242 static struct root_backref *get_root_backref(struct root_record *rec,
3243                                              u64 ref_root, u64 dir, u64 index,
3244                                              const char *name, int namelen)
3245 {
3246         struct root_backref *backref;
3247
3248         list_for_each_entry(backref, &rec->backrefs, list) {
3249                 if (backref->ref_root != ref_root || backref->dir != dir ||
3250                     backref->namelen != namelen)
3251                         continue;
3252                 if (memcmp(name, backref->name, namelen))
3253                         continue;
3254                 return backref;
3255         }
3256
3257         backref = calloc(1, sizeof(*backref) + namelen + 1);
3258         if (!backref)
3259                 return NULL;
3260         backref->ref_root = ref_root;
3261         backref->dir = dir;
3262         backref->index = index;
3263         backref->namelen = namelen;
3264         memcpy(backref->name, name, namelen);
3265         backref->name[namelen] = '\0';
3266         list_add_tail(&backref->list, &rec->backrefs);
3267         return backref;
3268 }
3269
3270 static void free_root_record(struct cache_extent *cache)
3271 {
3272         struct root_record *rec;
3273         struct root_backref *backref;
3274
3275         rec = container_of(cache, struct root_record, cache);
3276         while (!list_empty(&rec->backrefs)) {
3277                 backref = to_root_backref(rec->backrefs.next);
3278                 list_del(&backref->list);
3279                 free(backref);
3280         }
3281
3282         kfree(rec);
3283 }
3284
3285 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3286
3287 static int add_root_backref(struct cache_tree *root_cache,
3288                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3289                             const char *name, int namelen,
3290                             int item_type, int errors)
3291 {
3292         struct root_record *rec;
3293         struct root_backref *backref;
3294
3295         rec = get_root_rec(root_cache, root_id);
3296         BUG_ON(IS_ERR(rec));
3297         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3298         BUG_ON(!backref);
3299
3300         backref->errors |= errors;
3301
3302         if (item_type != BTRFS_DIR_ITEM_KEY) {
3303                 if (backref->found_dir_index || backref->found_back_ref ||
3304                     backref->found_forward_ref) {
3305                         if (backref->index != index)
3306                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3307                 } else {
3308                         backref->index = index;
3309                 }
3310         }
3311
3312         if (item_type == BTRFS_DIR_ITEM_KEY) {
3313                 if (backref->found_forward_ref)
3314                         rec->found_ref++;
3315                 backref->found_dir_item = 1;
3316         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3317                 backref->found_dir_index = 1;
3318         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3319                 if (backref->found_forward_ref)
3320                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3321                 else if (backref->found_dir_item)
3322                         rec->found_ref++;
3323                 backref->found_forward_ref = 1;
3324         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3325                 if (backref->found_back_ref)
3326                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3327                 backref->found_back_ref = 1;
3328         } else {
3329                 BUG_ON(1);
3330         }
3331
3332         if (backref->found_forward_ref && backref->found_dir_item)
3333                 backref->reachable = 1;
3334         return 0;
3335 }
3336
3337 static int merge_root_recs(struct btrfs_root *root,
3338                            struct cache_tree *src_cache,
3339                            struct cache_tree *dst_cache)
3340 {
3341         struct cache_extent *cache;
3342         struct ptr_node *node;
3343         struct inode_record *rec;
3344         struct inode_backref *backref;
3345         int ret = 0;
3346
3347         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3348                 free_inode_recs_tree(src_cache);
3349                 return 0;
3350         }
3351
3352         while (1) {
3353                 cache = search_cache_extent(src_cache, 0);
3354                 if (!cache)
3355                         break;
3356                 node = container_of(cache, struct ptr_node, cache);
3357                 rec = node->data;
3358                 remove_cache_extent(src_cache, &node->cache);
3359                 free(node);
3360
3361                 ret = is_child_root(root, root->objectid, rec->ino);
3362                 if (ret < 0)
3363                         break;
3364                 else if (ret == 0)
3365                         goto skip;
3366
3367                 list_for_each_entry(backref, &rec->backrefs, list) {
3368                         BUG_ON(backref->found_inode_ref);
3369                         if (backref->found_dir_item)
3370                                 add_root_backref(dst_cache, rec->ino,
3371                                         root->root_key.objectid, backref->dir,
3372                                         backref->index, backref->name,
3373                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3374                                         backref->errors);
3375                         if (backref->found_dir_index)
3376                                 add_root_backref(dst_cache, rec->ino,
3377                                         root->root_key.objectid, backref->dir,
3378                                         backref->index, backref->name,
3379                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3380                                         backref->errors);
3381                 }
3382 skip:
3383                 free_inode_rec(rec);
3384         }
3385         if (ret < 0)
3386                 return ret;
3387         return 0;
3388 }
3389
3390 static int check_root_refs(struct btrfs_root *root,
3391                            struct cache_tree *root_cache)
3392 {
3393         struct root_record *rec;
3394         struct root_record *ref_root;
3395         struct root_backref *backref;
3396         struct cache_extent *cache;
3397         int loop = 1;
3398         int ret;
3399         int error;
3400         int errors = 0;
3401
3402         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3403         BUG_ON(IS_ERR(rec));
3404         rec->found_ref = 1;
3405
3406         /* fixme: this can not detect circular references */
3407         while (loop) {
3408                 loop = 0;
3409                 cache = search_cache_extent(root_cache, 0);
3410                 while (1) {
3411                         if (!cache)
3412                                 break;
3413                         rec = container_of(cache, struct root_record, cache);
3414                         cache = next_cache_extent(cache);
3415
3416                         if (rec->found_ref == 0)
3417                                 continue;
3418
3419                         list_for_each_entry(backref, &rec->backrefs, list) {
3420                                 if (!backref->reachable)
3421                                         continue;
3422
3423                                 ref_root = get_root_rec(root_cache,
3424                                                         backref->ref_root);
3425                                 BUG_ON(IS_ERR(ref_root));
3426                                 if (ref_root->found_ref > 0)
3427                                         continue;
3428
3429                                 backref->reachable = 0;
3430                                 rec->found_ref--;
3431                                 if (rec->found_ref == 0)
3432                                         loop = 1;
3433                         }
3434                 }
3435         }
3436
3437         cache = search_cache_extent(root_cache, 0);
3438         while (1) {
3439                 if (!cache)
3440                         break;
3441                 rec = container_of(cache, struct root_record, cache);
3442                 cache = next_cache_extent(cache);
3443
3444                 if (rec->found_ref == 0 &&
3445                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3446                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3447                         ret = check_orphan_item(root->fs_info->tree_root,
3448                                                 rec->objectid);
3449                         if (ret == 0)
3450                                 continue;
3451
3452                         /*
3453                          * If we don't have a root item then we likely just have
3454                          * a dir item in a snapshot for this root but no actual
3455                          * ref key or anything so it's meaningless.
3456                          */
3457                         if (!rec->found_root_item)
3458                                 continue;
3459                         errors++;
3460                         fprintf(stderr, "fs tree %llu not referenced\n",
3461                                 (unsigned long long)rec->objectid);
3462                 }
3463
3464                 error = 0;
3465                 if (rec->found_ref > 0 && !rec->found_root_item)
3466                         error = 1;
3467                 list_for_each_entry(backref, &rec->backrefs, list) {
3468                         if (!backref->found_dir_item)
3469                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3470                         if (!backref->found_dir_index)
3471                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3472                         if (!backref->found_back_ref)
3473                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3474                         if (!backref->found_forward_ref)
3475                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3476                         if (backref->reachable && backref->errors)
3477                                 error = 1;
3478                 }
3479                 if (!error)
3480                         continue;
3481
3482                 errors++;
3483                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3484                         (unsigned long long)rec->objectid, rec->found_ref,
3485                          rec->found_root_item ? "" : "not found");
3486
3487                 list_for_each_entry(backref, &rec->backrefs, list) {
3488                         if (!backref->reachable)
3489                                 continue;
3490                         if (!backref->errors && rec->found_root_item)
3491                                 continue;
3492                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3493                                 " index %llu namelen %u name %s errors %x\n",
3494                                 (unsigned long long)backref->ref_root,
3495                                 (unsigned long long)backref->dir,
3496                                 (unsigned long long)backref->index,
3497                                 backref->namelen, backref->name,
3498                                 backref->errors);
3499                         print_ref_error(backref->errors);
3500                 }
3501         }
3502         return errors > 0 ? 1 : 0;
3503 }
3504
3505 static int process_root_ref(struct extent_buffer *eb, int slot,
3506                             struct btrfs_key *key,
3507                             struct cache_tree *root_cache)
3508 {
3509         u64 dirid;
3510         u64 index;
3511         u32 len;
3512         u32 name_len;
3513         struct btrfs_root_ref *ref;
3514         char namebuf[BTRFS_NAME_LEN];
3515         int error;
3516
3517         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3518
3519         dirid = btrfs_root_ref_dirid(eb, ref);
3520         index = btrfs_root_ref_sequence(eb, ref);
3521         name_len = btrfs_root_ref_name_len(eb, ref);
3522
3523         if (name_len <= BTRFS_NAME_LEN) {
3524                 len = name_len;
3525                 error = 0;
3526         } else {
3527                 len = BTRFS_NAME_LEN;
3528                 error = REF_ERR_NAME_TOO_LONG;
3529         }
3530         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3531
3532         if (key->type == BTRFS_ROOT_REF_KEY) {
3533                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3534                                  index, namebuf, len, key->type, error);
3535         } else {
3536                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3537                                  index, namebuf, len, key->type, error);
3538         }
3539         return 0;
3540 }
3541
3542 static void free_corrupt_block(struct cache_extent *cache)
3543 {
3544         struct btrfs_corrupt_block *corrupt;
3545
3546         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3547         free(corrupt);
3548 }
3549
3550 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3551
3552 /*
3553  * Repair the btree of the given root.
3554  *
3555  * The fix is to remove the node key in corrupt_blocks cache_tree.
3556  * and rebalance the tree.
3557  * After the fix, the btree should be writeable.
3558  */
3559 static int repair_btree(struct btrfs_root *root,
3560                         struct cache_tree *corrupt_blocks)
3561 {
3562         struct btrfs_trans_handle *trans;
3563         struct btrfs_path *path;
3564         struct btrfs_corrupt_block *corrupt;
3565         struct cache_extent *cache;
3566         struct btrfs_key key;
3567         u64 offset;
3568         int level;
3569         int ret = 0;
3570
3571         if (cache_tree_empty(corrupt_blocks))
3572                 return 0;
3573
3574         path = btrfs_alloc_path();
3575         if (!path)
3576                 return -ENOMEM;
3577
3578         trans = btrfs_start_transaction(root, 1);
3579         if (IS_ERR(trans)) {
3580                 ret = PTR_ERR(trans);
3581                 fprintf(stderr, "Error starting transaction: %s\n",
3582                         strerror(-ret));
3583                 goto out_free_path;
3584         }
3585         cache = first_cache_extent(corrupt_blocks);
3586         while (cache) {
3587                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3588                                        cache);
3589                 level = corrupt->level;
3590                 path->lowest_level = level;
3591                 key.objectid = corrupt->key.objectid;
3592                 key.type = corrupt->key.type;
3593                 key.offset = corrupt->key.offset;
3594
3595                 /*
3596                  * Here we don't want to do any tree balance, since it may
3597                  * cause a balance with corrupted brother leaf/node,
3598                  * so ins_len set to 0 here.
3599                  * Balance will be done after all corrupt node/leaf is deleted.
3600                  */
3601                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3602                 if (ret < 0)
3603                         goto out;
3604                 offset = btrfs_node_blockptr(path->nodes[level],
3605                                              path->slots[level]);
3606
3607                 /* Remove the ptr */
3608                 ret = btrfs_del_ptr(trans, root, path, level,
3609                                     path->slots[level]);
3610                 if (ret < 0)
3611                         goto out;
3612                 /*
3613                  * Remove the corresponding extent
3614                  * return value is not concerned.
3615                  */
3616                 btrfs_release_path(path);
3617                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3618                                         0, root->root_key.objectid,
3619                                         level - 1, 0);
3620                 cache = next_cache_extent(cache);
3621         }
3622
3623         /* Balance the btree using btrfs_search_slot() */
3624         cache = first_cache_extent(corrupt_blocks);
3625         while (cache) {
3626                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3627                                        cache);
3628                 memcpy(&key, &corrupt->key, sizeof(key));
3629                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3630                 if (ret < 0)
3631                         goto out;
3632                 /* return will always >0 since it won't find the item */
3633                 ret = 0;
3634                 btrfs_release_path(path);
3635                 cache = next_cache_extent(cache);
3636         }
3637 out:
3638         btrfs_commit_transaction(trans, root);
3639 out_free_path:
3640         btrfs_free_path(path);
3641         return ret;
3642 }
3643
3644 static int check_fs_root(struct btrfs_root *root,
3645                          struct cache_tree *root_cache,
3646                          struct walk_control *wc)
3647 {
3648         int ret = 0;
3649         int err = 0;
3650         int wret;
3651         int level;
3652         struct btrfs_path path;
3653         struct shared_node root_node;
3654         struct root_record *rec;
3655         struct btrfs_root_item *root_item = &root->root_item;
3656         struct cache_tree corrupt_blocks;
3657         struct orphan_data_extent *orphan;
3658         struct orphan_data_extent *tmp;
3659         enum btrfs_tree_block_status status;
3660         struct node_refs nrefs;
3661
3662         /*
3663          * Reuse the corrupt_block cache tree to record corrupted tree block
3664          *
3665          * Unlike the usage in extent tree check, here we do it in a per
3666          * fs/subvol tree base.
3667          */
3668         cache_tree_init(&corrupt_blocks);
3669         root->fs_info->corrupt_blocks = &corrupt_blocks;
3670
3671         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3672                 rec = get_root_rec(root_cache, root->root_key.objectid);
3673                 BUG_ON(IS_ERR(rec));
3674                 if (btrfs_root_refs(root_item) > 0)
3675                         rec->found_root_item = 1;
3676         }
3677
3678         btrfs_init_path(&path);
3679         memset(&root_node, 0, sizeof(root_node));
3680         cache_tree_init(&root_node.root_cache);
3681         cache_tree_init(&root_node.inode_cache);
3682         memset(&nrefs, 0, sizeof(nrefs));
3683
3684         /* Move the orphan extent record to corresponding inode_record */
3685         list_for_each_entry_safe(orphan, tmp,
3686                                  &root->orphan_data_extents, list) {
3687                 struct inode_record *inode;
3688
3689                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3690                                       1);
3691                 BUG_ON(IS_ERR(inode));
3692                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3693                 list_move(&orphan->list, &inode->orphan_extents);
3694         }
3695
3696         level = btrfs_header_level(root->node);
3697         memset(wc->nodes, 0, sizeof(wc->nodes));
3698         wc->nodes[level] = &root_node;
3699         wc->active_node = level;
3700         wc->root_level = level;
3701
3702         /* We may not have checked the root block, lets do that now */
3703         if (btrfs_is_leaf(root->node))
3704                 status = btrfs_check_leaf(root, NULL, root->node);
3705         else
3706                 status = btrfs_check_node(root, NULL, root->node);
3707         if (status != BTRFS_TREE_BLOCK_CLEAN)
3708                 return -EIO;
3709
3710         if (btrfs_root_refs(root_item) > 0 ||
3711             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3712                 path.nodes[level] = root->node;
3713                 extent_buffer_get(root->node);
3714                 path.slots[level] = 0;
3715         } else {
3716                 struct btrfs_key key;
3717                 struct btrfs_disk_key found_key;
3718
3719                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3720                 level = root_item->drop_level;
3721                 path.lowest_level = level;
3722                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3723                 if (wret < 0)
3724                         goto skip_walking;
3725                 btrfs_node_key(path.nodes[level], &found_key,
3726                                 path.slots[level]);
3727                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3728                                         sizeof(found_key)));
3729         }
3730
3731         while (1) {
3732                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3733                 if (wret < 0)
3734                         ret = wret;
3735                 if (wret != 0)
3736                         break;
3737
3738                 wret = walk_up_tree(root, &path, wc, &level);
3739                 if (wret < 0)
3740                         ret = wret;
3741                 if (wret != 0)
3742                         break;
3743         }
3744 skip_walking:
3745         btrfs_release_path(&path);
3746
3747         if (!cache_tree_empty(&corrupt_blocks)) {
3748                 struct cache_extent *cache;
3749                 struct btrfs_corrupt_block *corrupt;
3750
3751                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3752                        root->root_key.objectid);
3753                 cache = first_cache_extent(&corrupt_blocks);
3754                 while (cache) {
3755                         corrupt = container_of(cache,
3756                                                struct btrfs_corrupt_block,
3757                                                cache);
3758                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3759                                cache->start, corrupt->level,
3760                                corrupt->key.objectid, corrupt->key.type,
3761                                corrupt->key.offset);
3762                         cache = next_cache_extent(cache);
3763                 }
3764                 if (repair) {
3765                         printf("Try to repair the btree for root %llu\n",
3766                                root->root_key.objectid);
3767                         ret = repair_btree(root, &corrupt_blocks);
3768                         if (ret < 0)
3769                                 fprintf(stderr, "Failed to repair btree: %s\n",
3770                                         strerror(-ret));
3771                         if (!ret)
3772                                 printf("Btree for root %llu is fixed\n",
3773                                        root->root_key.objectid);
3774                 }
3775         }
3776
3777         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3778         if (err < 0)
3779                 ret = err;
3780
3781         if (root_node.current) {
3782                 root_node.current->checked = 1;
3783                 maybe_free_inode_rec(&root_node.inode_cache,
3784                                 root_node.current);
3785         }
3786
3787         err = check_inode_recs(root, &root_node.inode_cache);
3788         if (!ret)
3789                 ret = err;
3790
3791         free_corrupt_blocks_tree(&corrupt_blocks);
3792         root->fs_info->corrupt_blocks = NULL;
3793         free_orphan_data_extents(&root->orphan_data_extents);
3794         return ret;
3795 }
3796
3797 static int fs_root_objectid(u64 objectid)
3798 {
3799         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3800             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3801                 return 1;
3802         return is_fstree(objectid);
3803 }
3804
3805 static int check_fs_roots(struct btrfs_root *root,
3806                           struct cache_tree *root_cache)
3807 {
3808         struct btrfs_path path;
3809         struct btrfs_key key;
3810         struct walk_control wc;
3811         struct extent_buffer *leaf, *tree_node;
3812         struct btrfs_root *tmp_root;
3813         struct btrfs_root *tree_root = root->fs_info->tree_root;
3814         int ret;
3815         int err = 0;
3816
3817         if (ctx.progress_enabled) {
3818                 ctx.tp = TASK_FS_ROOTS;
3819                 task_start(ctx.info);
3820         }
3821
3822         /*
3823          * Just in case we made any changes to the extent tree that weren't
3824          * reflected into the free space cache yet.
3825          */
3826         if (repair)
3827                 reset_cached_block_groups(root->fs_info);
3828         memset(&wc, 0, sizeof(wc));
3829         cache_tree_init(&wc.shared);
3830         btrfs_init_path(&path);
3831
3832 again:
3833         key.offset = 0;
3834         key.objectid = 0;
3835         key.type = BTRFS_ROOT_ITEM_KEY;
3836         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3837         if (ret < 0) {
3838                 err = 1;
3839                 goto out;
3840         }
3841         tree_node = tree_root->node;
3842         while (1) {
3843                 if (tree_node != tree_root->node) {
3844                         free_root_recs_tree(root_cache);
3845                         btrfs_release_path(&path);
3846                         goto again;
3847                 }
3848                 leaf = path.nodes[0];
3849                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3850                         ret = btrfs_next_leaf(tree_root, &path);
3851                         if (ret) {
3852                                 if (ret < 0)
3853                                         err = 1;
3854                                 break;
3855                         }
3856                         leaf = path.nodes[0];
3857                 }
3858                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3859                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3860                     fs_root_objectid(key.objectid)) {
3861                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3862                                 tmp_root = btrfs_read_fs_root_no_cache(
3863                                                 root->fs_info, &key);
3864                         } else {
3865                                 key.offset = (u64)-1;
3866                                 tmp_root = btrfs_read_fs_root(
3867                                                 root->fs_info, &key);
3868                         }
3869                         if (IS_ERR(tmp_root)) {
3870                                 err = 1;
3871                                 goto next;
3872                         }
3873                         ret = check_fs_root(tmp_root, root_cache, &wc);
3874                         if (ret == -EAGAIN) {
3875                                 free_root_recs_tree(root_cache);
3876                                 btrfs_release_path(&path);
3877                                 goto again;
3878                         }
3879                         if (ret)
3880                                 err = 1;
3881                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3882                                 btrfs_free_fs_root(tmp_root);
3883                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3884                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3885                         process_root_ref(leaf, path.slots[0], &key,
3886                                          root_cache);
3887                 }
3888 next:
3889                 path.slots[0]++;
3890         }
3891 out:
3892         btrfs_release_path(&path);
3893         if (err)
3894                 free_extent_cache_tree(&wc.shared);
3895         if (!cache_tree_empty(&wc.shared))
3896                 fprintf(stderr, "warning line %d\n", __LINE__);
3897
3898         task_stop(ctx.info);
3899
3900         return err;
3901 }
3902
3903 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3904 {
3905         struct rb_node *n;
3906         struct extent_backref *back;
3907         struct tree_backref *tback;
3908         struct data_backref *dback;
3909         u64 found = 0;
3910         int err = 0;
3911
3912         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3913                 back = rb_node_to_extent_backref(n);
3914                 if (!back->found_extent_tree) {
3915                         err = 1;
3916                         if (!print_errs)
3917                                 goto out;
3918                         if (back->is_data) {
3919                                 dback = to_data_backref(back);
3920                                 fprintf(stderr, "Backref %llu %s %llu"
3921                                         " owner %llu offset %llu num_refs %lu"
3922                                         " not found in extent tree\n",
3923                                         (unsigned long long)rec->start,
3924                                         back->full_backref ?
3925                                         "parent" : "root",
3926                                         back->full_backref ?
3927                                         (unsigned long long)dback->parent:
3928                                         (unsigned long long)dback->root,
3929                                         (unsigned long long)dback->owner,
3930                                         (unsigned long long)dback->offset,
3931                                         (unsigned long)dback->num_refs);
3932                         } else {
3933                                 tback = to_tree_backref(back);
3934                                 fprintf(stderr, "Backref %llu parent %llu"
3935                                         " root %llu not found in extent tree\n",
3936                                         (unsigned long long)rec->start,
3937                                         (unsigned long long)tback->parent,
3938                                         (unsigned long long)tback->root);
3939                         }
3940                 }
3941                 if (!back->is_data && !back->found_ref) {
3942                         err = 1;
3943                         if (!print_errs)
3944                                 goto out;
3945                         tback = to_tree_backref(back);
3946                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3947                                 (unsigned long long)rec->start,
3948                                 back->full_backref ? "parent" : "root",
3949                                 back->full_backref ?
3950                                 (unsigned long long)tback->parent :
3951                                 (unsigned long long)tback->root, back);
3952                 }
3953                 if (back->is_data) {
3954                         dback = to_data_backref(back);
3955                         if (dback->found_ref != dback->num_refs) {
3956                                 err = 1;
3957                                 if (!print_errs)
3958                                         goto out;
3959                                 fprintf(stderr, "Incorrect local backref count"
3960                                         " on %llu %s %llu owner %llu"
3961                                         " offset %llu found %u wanted %u back %p\n",
3962                                         (unsigned long long)rec->start,
3963                                         back->full_backref ?
3964                                         "parent" : "root",
3965                                         back->full_backref ?
3966                                         (unsigned long long)dback->parent:
3967                                         (unsigned long long)dback->root,
3968                                         (unsigned long long)dback->owner,
3969                                         (unsigned long long)dback->offset,
3970                                         dback->found_ref, dback->num_refs, back);
3971                         }
3972                         if (dback->disk_bytenr != rec->start) {
3973                                 err = 1;
3974                                 if (!print_errs)
3975                                         goto out;
3976                                 fprintf(stderr, "Backref disk bytenr does not"
3977                                         " match extent record, bytenr=%llu, "
3978                                         "ref bytenr=%llu\n",
3979                                         (unsigned long long)rec->start,
3980                                         (unsigned long long)dback->disk_bytenr);
3981                         }
3982
3983                         if (dback->bytes != rec->nr) {
3984                                 err = 1;
3985                                 if (!print_errs)
3986                                         goto out;
3987                                 fprintf(stderr, "Backref bytes do not match "
3988                                         "extent backref, bytenr=%llu, ref "
3989                                         "bytes=%llu, backref bytes=%llu\n",
3990                                         (unsigned long long)rec->start,
3991                                         (unsigned long long)rec->nr,
3992                                         (unsigned long long)dback->bytes);
3993                         }
3994                 }
3995                 if (!back->is_data) {
3996                         found += 1;
3997                 } else {
3998                         dback = to_data_backref(back);
3999                         found += dback->found_ref;
4000                 }
4001         }
4002         if (found != rec->refs) {
4003                 err = 1;
4004                 if (!print_errs)
4005                         goto out;
4006                 fprintf(stderr, "Incorrect global backref count "
4007                         "on %llu found %llu wanted %llu\n",
4008                         (unsigned long long)rec->start,
4009                         (unsigned long long)found,
4010                         (unsigned long long)rec->refs);
4011         }
4012 out:
4013         return err;
4014 }
4015
4016 static void __free_one_backref(struct rb_node *node)
4017 {
4018         struct extent_backref *back = rb_node_to_extent_backref(node);
4019
4020         free(back);
4021 }
4022
4023 static void free_all_extent_backrefs(struct extent_record *rec)
4024 {
4025         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4026 }
4027
4028 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4029                                      struct cache_tree *extent_cache)
4030 {
4031         struct cache_extent *cache;
4032         struct extent_record *rec;
4033
4034         while (1) {
4035                 cache = first_cache_extent(extent_cache);
4036                 if (!cache)
4037                         break;
4038                 rec = container_of(cache, struct extent_record, cache);
4039                 remove_cache_extent(extent_cache, cache);
4040                 free_all_extent_backrefs(rec);
4041                 free(rec);
4042         }
4043 }
4044
4045 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4046                                  struct extent_record *rec)
4047 {
4048         if (rec->content_checked && rec->owner_ref_checked &&
4049             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4050             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4051             !rec->bad_full_backref && !rec->crossing_stripes &&
4052             !rec->wrong_chunk_type) {
4053                 remove_cache_extent(extent_cache, &rec->cache);
4054                 free_all_extent_backrefs(rec);
4055                 list_del_init(&rec->list);
4056                 free(rec);
4057         }
4058         return 0;
4059 }
4060
4061 static int check_owner_ref(struct btrfs_root *root,
4062                             struct extent_record *rec,
4063                             struct extent_buffer *buf)
4064 {
4065         struct extent_backref *node, *tmp;
4066         struct tree_backref *back;
4067         struct btrfs_root *ref_root;
4068         struct btrfs_key key;
4069         struct btrfs_path path;
4070         struct extent_buffer *parent;
4071         int level;
4072         int found = 0;
4073         int ret;
4074
4075         rbtree_postorder_for_each_entry_safe(node, tmp,
4076                                              &rec->backref_tree, node) {
4077                 if (node->is_data)
4078                         continue;
4079                 if (!node->found_ref)
4080                         continue;
4081                 if (node->full_backref)
4082                         continue;
4083                 back = to_tree_backref(node);
4084                 if (btrfs_header_owner(buf) == back->root)
4085                         return 0;
4086         }
4087         BUG_ON(rec->is_root);
4088
4089         /* try to find the block by search corresponding fs tree */
4090         key.objectid = btrfs_header_owner(buf);
4091         key.type = BTRFS_ROOT_ITEM_KEY;
4092         key.offset = (u64)-1;
4093
4094         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4095         if (IS_ERR(ref_root))
4096                 return 1;
4097
4098         level = btrfs_header_level(buf);
4099         if (level == 0)
4100                 btrfs_item_key_to_cpu(buf, &key, 0);
4101         else
4102                 btrfs_node_key_to_cpu(buf, &key, 0);
4103
4104         btrfs_init_path(&path);
4105         path.lowest_level = level + 1;
4106         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4107         if (ret < 0)
4108                 return 0;
4109
4110         parent = path.nodes[level + 1];
4111         if (parent && buf->start == btrfs_node_blockptr(parent,
4112                                                         path.slots[level + 1]))
4113                 found = 1;
4114
4115         btrfs_release_path(&path);
4116         return found ? 0 : 1;
4117 }
4118
4119 static int is_extent_tree_record(struct extent_record *rec)
4120 {
4121         struct extent_backref *ref, *tmp;
4122         struct tree_backref *back;
4123         int is_extent = 0;
4124
4125         rbtree_postorder_for_each_entry_safe(ref, tmp,
4126                                              &rec->backref_tree, node) {
4127                 if (ref->is_data)
4128                         return 0;
4129                 back = to_tree_backref(ref);
4130                 if (ref->full_backref)
4131                         return 0;
4132                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4133                         is_extent = 1;
4134         }
4135         return is_extent;
4136 }
4137
4138
4139 static int record_bad_block_io(struct btrfs_fs_info *info,
4140                                struct cache_tree *extent_cache,
4141                                u64 start, u64 len)
4142 {
4143         struct extent_record *rec;
4144         struct cache_extent *cache;
4145         struct btrfs_key key;
4146
4147         cache = lookup_cache_extent(extent_cache, start, len);
4148         if (!cache)
4149                 return 0;
4150
4151         rec = container_of(cache, struct extent_record, cache);
4152         if (!is_extent_tree_record(rec))
4153                 return 0;
4154
4155         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4156         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4157 }
4158
4159 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4160                        struct extent_buffer *buf, int slot)
4161 {
4162         if (btrfs_header_level(buf)) {
4163                 struct btrfs_key_ptr ptr1, ptr2;
4164
4165                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4166                                    sizeof(struct btrfs_key_ptr));
4167                 read_extent_buffer(buf, &ptr2,
4168                                    btrfs_node_key_ptr_offset(slot + 1),
4169                                    sizeof(struct btrfs_key_ptr));
4170                 write_extent_buffer(buf, &ptr1,
4171                                     btrfs_node_key_ptr_offset(slot + 1),
4172                                     sizeof(struct btrfs_key_ptr));
4173                 write_extent_buffer(buf, &ptr2,
4174                                     btrfs_node_key_ptr_offset(slot),
4175                                     sizeof(struct btrfs_key_ptr));
4176                 if (slot == 0) {
4177                         struct btrfs_disk_key key;
4178                         btrfs_node_key(buf, &key, 0);
4179                         btrfs_fixup_low_keys(root, path, &key,
4180                                              btrfs_header_level(buf) + 1);
4181                 }
4182         } else {
4183                 struct btrfs_item *item1, *item2;
4184                 struct btrfs_key k1, k2;
4185                 char *item1_data, *item2_data;
4186                 u32 item1_offset, item2_offset, item1_size, item2_size;
4187
4188                 item1 = btrfs_item_nr(slot);
4189                 item2 = btrfs_item_nr(slot + 1);
4190                 btrfs_item_key_to_cpu(buf, &k1, slot);
4191                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4192                 item1_offset = btrfs_item_offset(buf, item1);
4193                 item2_offset = btrfs_item_offset(buf, item2);
4194                 item1_size = btrfs_item_size(buf, item1);
4195                 item2_size = btrfs_item_size(buf, item2);
4196
4197                 item1_data = malloc(item1_size);
4198                 if (!item1_data)
4199                         return -ENOMEM;
4200                 item2_data = malloc(item2_size);
4201                 if (!item2_data) {
4202                         free(item1_data);
4203                         return -ENOMEM;
4204                 }
4205
4206                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4207                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4208
4209                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4210                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4211                 free(item1_data);
4212                 free(item2_data);
4213
4214                 btrfs_set_item_offset(buf, item1, item2_offset);
4215                 btrfs_set_item_offset(buf, item2, item1_offset);
4216                 btrfs_set_item_size(buf, item1, item2_size);
4217                 btrfs_set_item_size(buf, item2, item1_size);
4218
4219                 path->slots[0] = slot;
4220                 btrfs_set_item_key_unsafe(root, path, &k2);
4221                 path->slots[0] = slot + 1;
4222                 btrfs_set_item_key_unsafe(root, path, &k1);
4223         }
4224         return 0;
4225 }
4226
4227 static int fix_key_order(struct btrfs_trans_handle *trans,
4228                          struct btrfs_root *root,
4229                          struct btrfs_path *path)
4230 {
4231         struct extent_buffer *buf;
4232         struct btrfs_key k1, k2;
4233         int i;
4234         int level = path->lowest_level;
4235         int ret = -EIO;
4236
4237         buf = path->nodes[level];
4238         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4239                 if (level) {
4240                         btrfs_node_key_to_cpu(buf, &k1, i);
4241                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4242                 } else {
4243                         btrfs_item_key_to_cpu(buf, &k1, i);
4244                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4245                 }
4246                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4247                         continue;
4248                 ret = swap_values(root, path, buf, i);
4249                 if (ret)
4250                         break;
4251                 btrfs_mark_buffer_dirty(buf);
4252                 i = 0;
4253         }
4254         return ret;
4255 }
4256
4257 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4258                              struct btrfs_root *root,
4259                              struct btrfs_path *path,
4260                              struct extent_buffer *buf, int slot)
4261 {
4262         struct btrfs_key key;
4263         int nritems = btrfs_header_nritems(buf);
4264
4265         btrfs_item_key_to_cpu(buf, &key, slot);
4266
4267         /* These are all the keys we can deal with missing. */
4268         if (key.type != BTRFS_DIR_INDEX_KEY &&
4269             key.type != BTRFS_EXTENT_ITEM_KEY &&
4270             key.type != BTRFS_METADATA_ITEM_KEY &&
4271             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4272             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4273                 return -1;
4274
4275         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4276                (unsigned long long)key.objectid, key.type,
4277                (unsigned long long)key.offset, slot, buf->start);
4278         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4279                               btrfs_item_nr_offset(slot + 1),
4280                               sizeof(struct btrfs_item) *
4281                               (nritems - slot - 1));
4282         btrfs_set_header_nritems(buf, nritems - 1);
4283         if (slot == 0) {
4284                 struct btrfs_disk_key disk_key;
4285
4286                 btrfs_item_key(buf, &disk_key, 0);
4287                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4288         }
4289         btrfs_mark_buffer_dirty(buf);
4290         return 0;
4291 }
4292
4293 static int fix_item_offset(struct btrfs_trans_handle *trans,
4294                            struct btrfs_root *root,
4295                            struct btrfs_path *path)
4296 {
4297         struct extent_buffer *buf;
4298         int i;
4299         int ret = 0;
4300
4301         /* We should only get this for leaves */
4302         BUG_ON(path->lowest_level);
4303         buf = path->nodes[0];
4304 again:
4305         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4306                 unsigned int shift = 0, offset;
4307
4308                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4309                     BTRFS_LEAF_DATA_SIZE(root)) {
4310                         if (btrfs_item_end_nr(buf, i) >
4311                             BTRFS_LEAF_DATA_SIZE(root)) {
4312                                 ret = delete_bogus_item(trans, root, path,
4313                                                         buf, i);
4314                                 if (!ret)
4315                                         goto again;
4316                                 fprintf(stderr, "item is off the end of the "
4317                                         "leaf, can't fix\n");
4318                                 ret = -EIO;
4319                                 break;
4320                         }
4321                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4322                                 btrfs_item_end_nr(buf, i);
4323                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4324                            btrfs_item_offset_nr(buf, i - 1)) {
4325                         if (btrfs_item_end_nr(buf, i) >
4326                             btrfs_item_offset_nr(buf, i - 1)) {
4327                                 ret = delete_bogus_item(trans, root, path,
4328                                                         buf, i);
4329                                 if (!ret)
4330                                         goto again;
4331                                 fprintf(stderr, "items overlap, can't fix\n");
4332                                 ret = -EIO;
4333                                 break;
4334                         }
4335                         shift = btrfs_item_offset_nr(buf, i - 1) -
4336                                 btrfs_item_end_nr(buf, i);
4337                 }
4338                 if (!shift)
4339                         continue;
4340
4341                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4342                        i, shift, (unsigned long long)buf->start);
4343                 offset = btrfs_item_offset_nr(buf, i);
4344                 memmove_extent_buffer(buf,
4345                                       btrfs_leaf_data(buf) + offset + shift,
4346                                       btrfs_leaf_data(buf) + offset,
4347                                       btrfs_item_size_nr(buf, i));
4348                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4349                                       offset + shift);
4350                 btrfs_mark_buffer_dirty(buf);
4351         }
4352
4353         /*
4354          * We may have moved things, in which case we want to exit so we don't
4355          * write those changes out.  Once we have proper abort functionality in
4356          * progs this can be changed to something nicer.
4357          */
4358         BUG_ON(ret);
4359         return ret;
4360 }
4361
4362 /*
4363  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4364  * then just return -EIO.
4365  */
4366 static int try_to_fix_bad_block(struct btrfs_root *root,
4367                                 struct extent_buffer *buf,
4368                                 enum btrfs_tree_block_status status)
4369 {
4370         struct btrfs_trans_handle *trans;
4371         struct ulist *roots;
4372         struct ulist_node *node;
4373         struct btrfs_root *search_root;
4374         struct btrfs_path *path;
4375         struct ulist_iterator iter;
4376         struct btrfs_key root_key, key;
4377         int ret;
4378
4379         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4380             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4381                 return -EIO;
4382
4383         path = btrfs_alloc_path();
4384         if (!path)
4385                 return -EIO;
4386
4387         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4388                                    0, &roots);
4389         if (ret) {
4390                 btrfs_free_path(path);
4391                 return -EIO;
4392         }
4393
4394         ULIST_ITER_INIT(&iter);
4395         while ((node = ulist_next(roots, &iter))) {
4396                 root_key.objectid = node->val;
4397                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4398                 root_key.offset = (u64)-1;
4399
4400                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4401                 if (IS_ERR(root)) {
4402                         ret = -EIO;
4403                         break;
4404                 }
4405
4406
4407                 trans = btrfs_start_transaction(search_root, 0);
4408                 if (IS_ERR(trans)) {
4409                         ret = PTR_ERR(trans);
4410                         break;
4411                 }
4412
4413                 path->lowest_level = btrfs_header_level(buf);
4414                 path->skip_check_block = 1;
4415                 if (path->lowest_level)
4416                         btrfs_node_key_to_cpu(buf, &key, 0);
4417                 else
4418                         btrfs_item_key_to_cpu(buf, &key, 0);
4419                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4420                 if (ret) {
4421                         ret = -EIO;
4422                         btrfs_commit_transaction(trans, search_root);
4423                         break;
4424                 }
4425                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4426                         ret = fix_key_order(trans, search_root, path);
4427                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4428                         ret = fix_item_offset(trans, search_root, path);
4429                 if (ret) {
4430                         btrfs_commit_transaction(trans, search_root);
4431                         break;
4432                 }
4433                 btrfs_release_path(path);
4434                 btrfs_commit_transaction(trans, search_root);
4435         }
4436         ulist_free(roots);
4437         btrfs_free_path(path);
4438         return ret;
4439 }
4440
4441 static int check_block(struct btrfs_root *root,
4442                        struct cache_tree *extent_cache,
4443                        struct extent_buffer *buf, u64 flags)
4444 {
4445         struct extent_record *rec;
4446         struct cache_extent *cache;
4447         struct btrfs_key key;
4448         enum btrfs_tree_block_status status;
4449         int ret = 0;
4450         int level;
4451
4452         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4453         if (!cache)
4454                 return 1;
4455         rec = container_of(cache, struct extent_record, cache);
4456         rec->generation = btrfs_header_generation(buf);
4457
4458         level = btrfs_header_level(buf);
4459         if (btrfs_header_nritems(buf) > 0) {
4460
4461                 if (level == 0)
4462                         btrfs_item_key_to_cpu(buf, &key, 0);
4463                 else
4464                         btrfs_node_key_to_cpu(buf, &key, 0);
4465
4466                 rec->info_objectid = key.objectid;
4467         }
4468         rec->info_level = level;
4469
4470         if (btrfs_is_leaf(buf))
4471                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4472         else
4473                 status = btrfs_check_node(root, &rec->parent_key, buf);
4474
4475         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4476                 if (repair)
4477                         status = try_to_fix_bad_block(root, buf, status);
4478                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4479                         ret = -EIO;
4480                         fprintf(stderr, "bad block %llu\n",
4481                                 (unsigned long long)buf->start);
4482                 } else {
4483                         /*
4484                          * Signal to callers we need to start the scan over
4485                          * again since we'll have cowed blocks.
4486                          */
4487                         ret = -EAGAIN;
4488                 }
4489         } else {
4490                 rec->content_checked = 1;
4491                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4492                         rec->owner_ref_checked = 1;
4493                 else {
4494                         ret = check_owner_ref(root, rec, buf);
4495                         if (!ret)
4496                                 rec->owner_ref_checked = 1;
4497                 }
4498         }
4499         if (!ret)
4500                 maybe_free_extent_rec(extent_cache, rec);
4501         return ret;
4502 }
4503
4504
4505 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4506                                                 u64 parent, u64 root)
4507 {
4508         struct rb_node *node;
4509         struct tree_backref *back = NULL;
4510         struct tree_backref match = {
4511                 .node = {
4512                         .is_data = 0,
4513                 },
4514         };
4515
4516         if (parent) {
4517                 match.parent = parent;
4518                 match.node.full_backref = 1;
4519         } else {
4520                 match.root = root;
4521         }
4522
4523         node = rb_search(&rec->backref_tree, &match.node.node,
4524                          (rb_compare_keys)compare_extent_backref, NULL);
4525         if (node)
4526                 back = to_tree_backref(rb_node_to_extent_backref(node));
4527
4528         return back;
4529 }
4530
4531 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4532                                                 u64 parent, u64 root)
4533 {
4534         struct tree_backref *ref = malloc(sizeof(*ref));
4535
4536         if (!ref)
4537                 return NULL;
4538         memset(&ref->node, 0, sizeof(ref->node));
4539         if (parent > 0) {
4540                 ref->parent = parent;
4541                 ref->node.full_backref = 1;
4542         } else {
4543                 ref->root = root;
4544                 ref->node.full_backref = 0;
4545         }
4546         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4547
4548         return ref;
4549 }
4550
4551 static struct data_backref *find_data_backref(struct extent_record *rec,
4552                                                 u64 parent, u64 root,
4553                                                 u64 owner, u64 offset,
4554                                                 int found_ref,
4555                                                 u64 disk_bytenr, u64 bytes)
4556 {
4557         struct rb_node *node;
4558         struct data_backref *back = NULL;
4559         struct data_backref match = {
4560                 .node = {
4561                         .is_data = 1,
4562                 },
4563                 .owner = owner,
4564                 .offset = offset,
4565                 .bytes = bytes,
4566                 .found_ref = found_ref,
4567                 .disk_bytenr = disk_bytenr,
4568         };
4569
4570         if (parent) {
4571                 match.parent = parent;
4572                 match.node.full_backref = 1;
4573         } else {
4574                 match.root = root;
4575         }
4576
4577         node = rb_search(&rec->backref_tree, &match.node.node,
4578                          (rb_compare_keys)compare_extent_backref, NULL);
4579         if (node)
4580                 back = to_data_backref(rb_node_to_extent_backref(node));
4581
4582         return back;
4583 }
4584
4585 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4586                                                 u64 parent, u64 root,
4587                                                 u64 owner, u64 offset,
4588                                                 u64 max_size)
4589 {
4590         struct data_backref *ref = malloc(sizeof(*ref));
4591
4592         if (!ref)
4593                 return NULL;
4594         memset(&ref->node, 0, sizeof(ref->node));
4595         ref->node.is_data = 1;
4596
4597         if (parent > 0) {
4598                 ref->parent = parent;
4599                 ref->owner = 0;
4600                 ref->offset = 0;
4601                 ref->node.full_backref = 1;
4602         } else {
4603                 ref->root = root;
4604                 ref->owner = owner;
4605                 ref->offset = offset;
4606                 ref->node.full_backref = 0;
4607         }
4608         ref->bytes = max_size;
4609         ref->found_ref = 0;
4610         ref->num_refs = 0;
4611         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4612         if (max_size > rec->max_size)
4613                 rec->max_size = max_size;
4614         return ref;
4615 }
4616
4617 /* Check if the type of extent matches with its chunk */
4618 static void check_extent_type(struct extent_record *rec)
4619 {
4620         struct btrfs_block_group_cache *bg_cache;
4621
4622         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4623         if (!bg_cache)
4624                 return;
4625
4626         /* data extent, check chunk directly*/
4627         if (!rec->metadata) {
4628                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4629                         rec->wrong_chunk_type = 1;
4630                 return;
4631         }
4632
4633         /* metadata extent, check the obvious case first */
4634         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4635                                  BTRFS_BLOCK_GROUP_METADATA))) {
4636                 rec->wrong_chunk_type = 1;
4637                 return;
4638         }
4639
4640         /*
4641          * Check SYSTEM extent, as it's also marked as metadata, we can only
4642          * make sure it's a SYSTEM extent by its backref
4643          */
4644         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4645                 struct extent_backref *node;
4646                 struct tree_backref *tback;
4647                 u64 bg_type;
4648
4649                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4650                 if (node->is_data) {
4651                         /* tree block shouldn't have data backref */
4652                         rec->wrong_chunk_type = 1;
4653                         return;
4654                 }
4655                 tback = container_of(node, struct tree_backref, node);
4656
4657                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4658                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4659                 else
4660                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4661                 if (!(bg_cache->flags & bg_type))
4662                         rec->wrong_chunk_type = 1;
4663         }
4664 }
4665
4666 /*
4667  * Allocate a new extent record, fill default values from @tmpl and insert int
4668  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4669  * the cache, otherwise it fails.
4670  */
4671 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4672                 struct extent_record *tmpl)
4673 {
4674         struct extent_record *rec;
4675         int ret = 0;
4676
4677         rec = malloc(sizeof(*rec));
4678         if (!rec)
4679                 return -ENOMEM;
4680         rec->start = tmpl->start;
4681         rec->max_size = tmpl->max_size;
4682         rec->nr = max(tmpl->nr, tmpl->max_size);
4683         rec->found_rec = tmpl->found_rec;
4684         rec->content_checked = tmpl->content_checked;
4685         rec->owner_ref_checked = tmpl->owner_ref_checked;
4686         rec->num_duplicates = 0;
4687         rec->metadata = tmpl->metadata;
4688         rec->flag_block_full_backref = FLAG_UNSET;
4689         rec->bad_full_backref = 0;
4690         rec->crossing_stripes = 0;
4691         rec->wrong_chunk_type = 0;
4692         rec->is_root = tmpl->is_root;
4693         rec->refs = tmpl->refs;
4694         rec->extent_item_refs = tmpl->extent_item_refs;
4695         rec->parent_generation = tmpl->parent_generation;
4696         INIT_LIST_HEAD(&rec->backrefs);
4697         INIT_LIST_HEAD(&rec->dups);
4698         INIT_LIST_HEAD(&rec->list);
4699         rec->backref_tree = RB_ROOT;
4700         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4701         rec->cache.start = tmpl->start;
4702         rec->cache.size = tmpl->nr;
4703         ret = insert_cache_extent(extent_cache, &rec->cache);
4704         BUG_ON(ret);
4705         bytes_used += rec->nr;
4706
4707         if (tmpl->metadata)
4708                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4709                                 global_info->tree_root->nodesize);
4710         check_extent_type(rec);
4711         return ret;
4712 }
4713
4714 /*
4715  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4716  * some are hints:
4717  * - refs              - if found, increase refs
4718  * - is_root           - if found, set
4719  * - content_checked   - if found, set
4720  * - owner_ref_checked - if found, set
4721  *
4722  * If not found, create a new one, initialize and insert.
4723  */
4724 static int add_extent_rec(struct cache_tree *extent_cache,
4725                 struct extent_record *tmpl)
4726 {
4727         struct extent_record *rec;
4728         struct cache_extent *cache;
4729         int ret = 0;
4730         int dup = 0;
4731
4732         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4733         if (cache) {
4734                 rec = container_of(cache, struct extent_record, cache);
4735                 if (tmpl->refs)
4736                         rec->refs++;
4737                 if (rec->nr == 1)
4738                         rec->nr = max(tmpl->nr, tmpl->max_size);
4739
4740                 /*
4741                  * We need to make sure to reset nr to whatever the extent
4742                  * record says was the real size, this way we can compare it to
4743                  * the backrefs.
4744                  */
4745                 if (tmpl->found_rec) {
4746                         if (tmpl->start != rec->start || rec->found_rec) {
4747                                 struct extent_record *tmp;
4748
4749                                 dup = 1;
4750                                 if (list_empty(&rec->list))
4751                                         list_add_tail(&rec->list,
4752                                                       &duplicate_extents);
4753
4754                                 /*
4755                                  * We have to do this song and dance in case we
4756                                  * find an extent record that falls inside of
4757                                  * our current extent record but does not have
4758                                  * the same objectid.
4759                                  */
4760                                 tmp = malloc(sizeof(*tmp));
4761                                 if (!tmp)
4762                                         return -ENOMEM;
4763                                 tmp->start = tmpl->start;
4764                                 tmp->max_size = tmpl->max_size;
4765                                 tmp->nr = tmpl->nr;
4766                                 tmp->found_rec = 1;
4767                                 tmp->metadata = tmpl->metadata;
4768                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4769                                 INIT_LIST_HEAD(&tmp->list);
4770                                 list_add_tail(&tmp->list, &rec->dups);
4771                                 rec->num_duplicates++;
4772                         } else {
4773                                 rec->nr = tmpl->nr;
4774                                 rec->found_rec = 1;
4775                         }
4776                 }
4777
4778                 if (tmpl->extent_item_refs && !dup) {
4779                         if (rec->extent_item_refs) {
4780                                 fprintf(stderr, "block %llu rec "
4781                                         "extent_item_refs %llu, passed %llu\n",
4782                                         (unsigned long long)tmpl->start,
4783                                         (unsigned long long)
4784                                                         rec->extent_item_refs,
4785                                         (unsigned long long)tmpl->extent_item_refs);
4786                         }
4787                         rec->extent_item_refs = tmpl->extent_item_refs;
4788                 }
4789                 if (tmpl->is_root)
4790                         rec->is_root = 1;
4791                 if (tmpl->content_checked)
4792                         rec->content_checked = 1;
4793                 if (tmpl->owner_ref_checked)
4794                         rec->owner_ref_checked = 1;
4795                 memcpy(&rec->parent_key, &tmpl->parent_key,
4796                                 sizeof(tmpl->parent_key));
4797                 if (tmpl->parent_generation)
4798                         rec->parent_generation = tmpl->parent_generation;
4799                 if (rec->max_size < tmpl->max_size)
4800                         rec->max_size = tmpl->max_size;
4801
4802                 /*
4803                  * A metadata extent can't cross stripe_len boundary, otherwise
4804                  * kernel scrub won't be able to handle it.
4805                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4806                  * it.
4807                  */
4808                 if (tmpl->metadata)
4809                         rec->crossing_stripes = check_crossing_stripes(
4810                                 rec->start, global_info->tree_root->nodesize);
4811                 check_extent_type(rec);
4812                 maybe_free_extent_rec(extent_cache, rec);
4813                 return ret;
4814         }
4815
4816         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4817
4818         return ret;
4819 }
4820
4821 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4822                             u64 parent, u64 root, int found_ref)
4823 {
4824         struct extent_record *rec;
4825         struct tree_backref *back;
4826         struct cache_extent *cache;
4827
4828         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4829         if (!cache) {
4830                 struct extent_record tmpl;
4831
4832                 memset(&tmpl, 0, sizeof(tmpl));
4833                 tmpl.start = bytenr;
4834                 tmpl.nr = 1;
4835                 tmpl.metadata = 1;
4836
4837                 add_extent_rec_nolookup(extent_cache, &tmpl);
4838
4839                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4840                 if (!cache)
4841                         abort();
4842         }
4843
4844         rec = container_of(cache, struct extent_record, cache);
4845         if (rec->start != bytenr) {
4846                 abort();
4847         }
4848
4849         back = find_tree_backref(rec, parent, root);
4850         if (!back) {
4851                 back = alloc_tree_backref(rec, parent, root);
4852                 BUG_ON(!back);
4853         }
4854
4855         if (found_ref) {
4856                 if (back->node.found_ref) {
4857                         fprintf(stderr, "Extent back ref already exists "
4858                                 "for %llu parent %llu root %llu \n",
4859                                 (unsigned long long)bytenr,
4860                                 (unsigned long long)parent,
4861                                 (unsigned long long)root);
4862                 }
4863                 back->node.found_ref = 1;
4864         } else {
4865                 if (back->node.found_extent_tree) {
4866                         fprintf(stderr, "Extent back ref already exists "
4867                                 "for %llu parent %llu root %llu \n",
4868                                 (unsigned long long)bytenr,
4869                                 (unsigned long long)parent,
4870                                 (unsigned long long)root);
4871                 }
4872                 back->node.found_extent_tree = 1;
4873         }
4874         check_extent_type(rec);
4875         maybe_free_extent_rec(extent_cache, rec);
4876         return 0;
4877 }
4878
4879 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4880                             u64 parent, u64 root, u64 owner, u64 offset,
4881                             u32 num_refs, int found_ref, u64 max_size)
4882 {
4883         struct extent_record *rec;
4884         struct data_backref *back;
4885         struct cache_extent *cache;
4886
4887         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4888         if (!cache) {
4889                 struct extent_record tmpl;
4890
4891                 memset(&tmpl, 0, sizeof(tmpl));
4892                 tmpl.start = bytenr;
4893                 tmpl.nr = 1;
4894                 tmpl.max_size = max_size;
4895
4896                 add_extent_rec_nolookup(extent_cache, &tmpl);
4897
4898                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4899                 if (!cache)
4900                         abort();
4901         }
4902
4903         rec = container_of(cache, struct extent_record, cache);
4904         if (rec->max_size < max_size)
4905                 rec->max_size = max_size;
4906
4907         /*
4908          * If found_ref is set then max_size is the real size and must match the
4909          * existing refs.  So if we have already found a ref then we need to
4910          * make sure that this ref matches the existing one, otherwise we need
4911          * to add a new backref so we can notice that the backrefs don't match
4912          * and we need to figure out who is telling the truth.  This is to
4913          * account for that awful fsync bug I introduced where we'd end up with
4914          * a btrfs_file_extent_item that would have its length include multiple
4915          * prealloc extents or point inside of a prealloc extent.
4916          */
4917         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4918                                  bytenr, max_size);
4919         if (!back) {
4920                 back = alloc_data_backref(rec, parent, root, owner, offset,
4921                                           max_size);
4922                 BUG_ON(!back);
4923         }
4924
4925         if (found_ref) {
4926                 BUG_ON(num_refs != 1);
4927                 if (back->node.found_ref)
4928                         BUG_ON(back->bytes != max_size);
4929                 back->node.found_ref = 1;
4930                 back->found_ref += 1;
4931                 back->bytes = max_size;
4932                 back->disk_bytenr = bytenr;
4933                 rec->refs += 1;
4934                 rec->content_checked = 1;
4935                 rec->owner_ref_checked = 1;
4936         } else {
4937                 if (back->node.found_extent_tree) {
4938                         fprintf(stderr, "Extent back ref already exists "
4939                                 "for %llu parent %llu root %llu "
4940                                 "owner %llu offset %llu num_refs %lu\n",
4941                                 (unsigned long long)bytenr,
4942                                 (unsigned long long)parent,
4943                                 (unsigned long long)root,
4944                                 (unsigned long long)owner,
4945                                 (unsigned long long)offset,
4946                                 (unsigned long)num_refs);
4947                 }
4948                 back->num_refs = num_refs;
4949                 back->node.found_extent_tree = 1;
4950         }
4951         maybe_free_extent_rec(extent_cache, rec);
4952         return 0;
4953 }
4954
4955 static int add_pending(struct cache_tree *pending,
4956                        struct cache_tree *seen, u64 bytenr, u32 size)
4957 {
4958         int ret;
4959         ret = add_cache_extent(seen, bytenr, size);
4960         if (ret)
4961                 return ret;
4962         add_cache_extent(pending, bytenr, size);
4963         return 0;
4964 }
4965
4966 static int pick_next_pending(struct cache_tree *pending,
4967                         struct cache_tree *reada,
4968                         struct cache_tree *nodes,
4969                         u64 last, struct block_info *bits, int bits_nr,
4970                         int *reada_bits)
4971 {
4972         unsigned long node_start = last;
4973         struct cache_extent *cache;
4974         int ret;
4975
4976         cache = search_cache_extent(reada, 0);
4977         if (cache) {
4978                 bits[0].start = cache->start;
4979                 bits[0].size = cache->size;
4980                 *reada_bits = 1;
4981                 return 1;
4982         }
4983         *reada_bits = 0;
4984         if (node_start > 32768)
4985                 node_start -= 32768;
4986
4987         cache = search_cache_extent(nodes, node_start);
4988         if (!cache)
4989                 cache = search_cache_extent(nodes, 0);
4990
4991         if (!cache) {
4992                  cache = search_cache_extent(pending, 0);
4993                  if (!cache)
4994                          return 0;
4995                  ret = 0;
4996                  do {
4997                          bits[ret].start = cache->start;
4998                          bits[ret].size = cache->size;
4999                          cache = next_cache_extent(cache);
5000                          ret++;
5001                  } while (cache && ret < bits_nr);
5002                  return ret;
5003         }
5004
5005         ret = 0;
5006         do {
5007                 bits[ret].start = cache->start;
5008                 bits[ret].size = cache->size;
5009                 cache = next_cache_extent(cache);
5010                 ret++;
5011         } while (cache && ret < bits_nr);
5012
5013         if (bits_nr - ret > 8) {
5014                 u64 lookup = bits[0].start + bits[0].size;
5015                 struct cache_extent *next;
5016                 next = search_cache_extent(pending, lookup);
5017                 while(next) {
5018                         if (next->start - lookup > 32768)
5019                                 break;
5020                         bits[ret].start = next->start;
5021                         bits[ret].size = next->size;
5022                         lookup = next->start + next->size;
5023                         ret++;
5024                         if (ret == bits_nr)
5025                                 break;
5026                         next = next_cache_extent(next);
5027                         if (!next)
5028                                 break;
5029                 }
5030         }
5031         return ret;
5032 }
5033
5034 static void free_chunk_record(struct cache_extent *cache)
5035 {
5036         struct chunk_record *rec;
5037
5038         rec = container_of(cache, struct chunk_record, cache);
5039         list_del_init(&rec->list);
5040         list_del_init(&rec->dextents);
5041         free(rec);
5042 }
5043
5044 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5045 {
5046         cache_tree_free_extents(chunk_cache, free_chunk_record);
5047 }
5048
5049 static void free_device_record(struct rb_node *node)
5050 {
5051         struct device_record *rec;
5052
5053         rec = container_of(node, struct device_record, node);
5054         free(rec);
5055 }
5056
5057 FREE_RB_BASED_TREE(device_cache, free_device_record);
5058
5059 int insert_block_group_record(struct block_group_tree *tree,
5060                               struct block_group_record *bg_rec)
5061 {
5062         int ret;
5063
5064         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5065         if (ret)
5066                 return ret;
5067
5068         list_add_tail(&bg_rec->list, &tree->block_groups);
5069         return 0;
5070 }
5071
5072 static void free_block_group_record(struct cache_extent *cache)
5073 {
5074         struct block_group_record *rec;
5075
5076         rec = container_of(cache, struct block_group_record, cache);
5077         list_del_init(&rec->list);
5078         free(rec);
5079 }
5080
5081 void free_block_group_tree(struct block_group_tree *tree)
5082 {
5083         cache_tree_free_extents(&tree->tree, free_block_group_record);
5084 }
5085
5086 int insert_device_extent_record(struct device_extent_tree *tree,
5087                                 struct device_extent_record *de_rec)
5088 {
5089         int ret;
5090
5091         /*
5092          * Device extent is a bit different from the other extents, because
5093          * the extents which belong to the different devices may have the
5094          * same start and size, so we need use the special extent cache
5095          * search/insert functions.
5096          */
5097         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5098         if (ret)
5099                 return ret;
5100
5101         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5102         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5103         return 0;
5104 }
5105
5106 static void free_device_extent_record(struct cache_extent *cache)
5107 {
5108         struct device_extent_record *rec;
5109
5110         rec = container_of(cache, struct device_extent_record, cache);
5111         if (!list_empty(&rec->chunk_list))
5112                 list_del_init(&rec->chunk_list);
5113         if (!list_empty(&rec->device_list))
5114                 list_del_init(&rec->device_list);
5115         free(rec);
5116 }
5117
5118 void free_device_extent_tree(struct device_extent_tree *tree)
5119 {
5120         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5121 }
5122
5123 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5124 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5125                                  struct extent_buffer *leaf, int slot)
5126 {
5127         struct btrfs_extent_ref_v0 *ref0;
5128         struct btrfs_key key;
5129
5130         btrfs_item_key_to_cpu(leaf, &key, slot);
5131         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5132         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5133                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5134         } else {
5135                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5136                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5137         }
5138         return 0;
5139 }
5140 #endif
5141
5142 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5143                                             struct btrfs_key *key,
5144                                             int slot)
5145 {
5146         struct btrfs_chunk *ptr;
5147         struct chunk_record *rec;
5148         int num_stripes, i;
5149
5150         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5151         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5152
5153         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5154         if (!rec) {
5155                 fprintf(stderr, "memory allocation failed\n");
5156                 exit(-1);
5157         }
5158
5159         INIT_LIST_HEAD(&rec->list);
5160         INIT_LIST_HEAD(&rec->dextents);
5161         rec->bg_rec = NULL;
5162
5163         rec->cache.start = key->offset;
5164         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5165
5166         rec->generation = btrfs_header_generation(leaf);
5167
5168         rec->objectid = key->objectid;
5169         rec->type = key->type;
5170         rec->offset = key->offset;
5171
5172         rec->length = rec->cache.size;
5173         rec->owner = btrfs_chunk_owner(leaf, ptr);
5174         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5175         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5176         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5177         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5178         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5179         rec->num_stripes = num_stripes;
5180         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5181
5182         for (i = 0; i < rec->num_stripes; ++i) {
5183                 rec->stripes[i].devid =
5184                         btrfs_stripe_devid_nr(leaf, ptr, i);
5185                 rec->stripes[i].offset =
5186                         btrfs_stripe_offset_nr(leaf, ptr, i);
5187                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5188                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5189                                 BTRFS_UUID_SIZE);
5190         }
5191
5192         return rec;
5193 }
5194
5195 static int process_chunk_item(struct cache_tree *chunk_cache,
5196                               struct btrfs_key *key, struct extent_buffer *eb,
5197                               int slot)
5198 {
5199         struct chunk_record *rec;
5200         int ret = 0;
5201
5202         rec = btrfs_new_chunk_record(eb, key, slot);
5203         ret = insert_cache_extent(chunk_cache, &rec->cache);
5204         if (ret) {
5205                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5206                         rec->offset, rec->length);
5207                 free(rec);
5208         }
5209
5210         return ret;
5211 }
5212
5213 static int process_device_item(struct rb_root *dev_cache,
5214                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5215 {
5216         struct btrfs_dev_item *ptr;
5217         struct device_record *rec;
5218         int ret = 0;
5219
5220         ptr = btrfs_item_ptr(eb,
5221                 slot, struct btrfs_dev_item);
5222
5223         rec = malloc(sizeof(*rec));
5224         if (!rec) {
5225                 fprintf(stderr, "memory allocation failed\n");
5226                 return -ENOMEM;
5227         }
5228
5229         rec->devid = key->offset;
5230         rec->generation = btrfs_header_generation(eb);
5231
5232         rec->objectid = key->objectid;
5233         rec->type = key->type;
5234         rec->offset = key->offset;
5235
5236         rec->devid = btrfs_device_id(eb, ptr);
5237         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5238         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5239
5240         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5241         if (ret) {
5242                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5243                 free(rec);
5244         }
5245
5246         return ret;
5247 }
5248
5249 struct block_group_record *
5250 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5251                              int slot)
5252 {
5253         struct btrfs_block_group_item *ptr;
5254         struct block_group_record *rec;
5255
5256         rec = calloc(1, sizeof(*rec));
5257         if (!rec) {
5258                 fprintf(stderr, "memory allocation failed\n");
5259                 exit(-1);
5260         }
5261
5262         rec->cache.start = key->objectid;
5263         rec->cache.size = key->offset;
5264
5265         rec->generation = btrfs_header_generation(leaf);
5266
5267         rec->objectid = key->objectid;
5268         rec->type = key->type;
5269         rec->offset = key->offset;
5270
5271         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5272         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5273
5274         INIT_LIST_HEAD(&rec->list);
5275
5276         return rec;
5277 }
5278
5279 static int process_block_group_item(struct block_group_tree *block_group_cache,
5280                                     struct btrfs_key *key,
5281                                     struct extent_buffer *eb, int slot)
5282 {
5283         struct block_group_record *rec;
5284         int ret = 0;
5285
5286         rec = btrfs_new_block_group_record(eb, key, slot);
5287         ret = insert_block_group_record(block_group_cache, rec);
5288         if (ret) {
5289                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5290                         rec->objectid, rec->offset);
5291                 free(rec);
5292         }
5293
5294         return ret;
5295 }
5296
5297 struct device_extent_record *
5298 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5299                                struct btrfs_key *key, int slot)
5300 {
5301         struct device_extent_record *rec;
5302         struct btrfs_dev_extent *ptr;
5303
5304         rec = calloc(1, sizeof(*rec));
5305         if (!rec) {
5306                 fprintf(stderr, "memory allocation failed\n");
5307                 exit(-1);
5308         }
5309
5310         rec->cache.objectid = key->objectid;
5311         rec->cache.start = key->offset;
5312
5313         rec->generation = btrfs_header_generation(leaf);
5314
5315         rec->objectid = key->objectid;
5316         rec->type = key->type;
5317         rec->offset = key->offset;
5318
5319         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5320         rec->chunk_objecteid =
5321                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5322         rec->chunk_offset =
5323                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5324         rec->length = btrfs_dev_extent_length(leaf, ptr);
5325         rec->cache.size = rec->length;
5326
5327         INIT_LIST_HEAD(&rec->chunk_list);
5328         INIT_LIST_HEAD(&rec->device_list);
5329
5330         return rec;
5331 }
5332
5333 static int
5334 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5335                            struct btrfs_key *key, struct extent_buffer *eb,
5336                            int slot)
5337 {
5338         struct device_extent_record *rec;
5339         int ret;
5340
5341         rec = btrfs_new_device_extent_record(eb, key, slot);
5342         ret = insert_device_extent_record(dev_extent_cache, rec);
5343         if (ret) {
5344                 fprintf(stderr,
5345                         "Device extent[%llu, %llu, %llu] existed.\n",
5346                         rec->objectid, rec->offset, rec->length);
5347                 free(rec);
5348         }
5349
5350         return ret;
5351 }
5352
5353 static int process_extent_item(struct btrfs_root *root,
5354                                struct cache_tree *extent_cache,
5355                                struct extent_buffer *eb, int slot)
5356 {
5357         struct btrfs_extent_item *ei;
5358         struct btrfs_extent_inline_ref *iref;
5359         struct btrfs_extent_data_ref *dref;
5360         struct btrfs_shared_data_ref *sref;
5361         struct btrfs_key key;
5362         struct extent_record tmpl;
5363         unsigned long end;
5364         unsigned long ptr;
5365         int type;
5366         u32 item_size = btrfs_item_size_nr(eb, slot);
5367         u64 refs = 0;
5368         u64 offset;
5369         u64 num_bytes;
5370         int metadata = 0;
5371
5372         btrfs_item_key_to_cpu(eb, &key, slot);
5373
5374         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5375                 metadata = 1;
5376                 num_bytes = root->nodesize;
5377         } else {
5378                 num_bytes = key.offset;
5379         }
5380
5381         if (item_size < sizeof(*ei)) {
5382 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5383                 struct btrfs_extent_item_v0 *ei0;
5384                 BUG_ON(item_size != sizeof(*ei0));
5385                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5386                 refs = btrfs_extent_refs_v0(eb, ei0);
5387 #else
5388                 BUG();
5389 #endif
5390                 memset(&tmpl, 0, sizeof(tmpl));
5391                 tmpl.start = key.objectid;
5392                 tmpl.nr = num_bytes;
5393                 tmpl.extent_item_refs = refs;
5394                 tmpl.metadata = metadata;
5395                 tmpl.found_rec = 1;
5396                 tmpl.max_size = num_bytes;
5397
5398                 return add_extent_rec(extent_cache, &tmpl);
5399         }
5400
5401         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5402         refs = btrfs_extent_refs(eb, ei);
5403         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5404                 metadata = 1;
5405         else
5406                 metadata = 0;
5407
5408         memset(&tmpl, 0, sizeof(tmpl));
5409         tmpl.start = key.objectid;
5410         tmpl.nr = num_bytes;
5411         tmpl.extent_item_refs = refs;
5412         tmpl.metadata = metadata;
5413         tmpl.found_rec = 1;
5414         tmpl.max_size = num_bytes;
5415         add_extent_rec(extent_cache, &tmpl);
5416
5417         ptr = (unsigned long)(ei + 1);
5418         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5419             key.type == BTRFS_EXTENT_ITEM_KEY)
5420                 ptr += sizeof(struct btrfs_tree_block_info);
5421
5422         end = (unsigned long)ei + item_size;
5423         while (ptr < end) {
5424                 iref = (struct btrfs_extent_inline_ref *)ptr;
5425                 type = btrfs_extent_inline_ref_type(eb, iref);
5426                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5427                 switch (type) {
5428                 case BTRFS_TREE_BLOCK_REF_KEY:
5429                         add_tree_backref(extent_cache, key.objectid,
5430                                          0, offset, 0);
5431                         break;
5432                 case BTRFS_SHARED_BLOCK_REF_KEY:
5433                         add_tree_backref(extent_cache, key.objectid,
5434                                          offset, 0, 0);
5435                         break;
5436                 case BTRFS_EXTENT_DATA_REF_KEY:
5437                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5438                         add_data_backref(extent_cache, key.objectid, 0,
5439                                         btrfs_extent_data_ref_root(eb, dref),
5440                                         btrfs_extent_data_ref_objectid(eb,
5441                                                                        dref),
5442                                         btrfs_extent_data_ref_offset(eb, dref),
5443                                         btrfs_extent_data_ref_count(eb, dref),
5444                                         0, num_bytes);
5445                         break;
5446                 case BTRFS_SHARED_DATA_REF_KEY:
5447                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5448                         add_data_backref(extent_cache, key.objectid, offset,
5449                                         0, 0, 0,
5450                                         btrfs_shared_data_ref_count(eb, sref),
5451                                         0, num_bytes);
5452                         break;
5453                 default:
5454                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5455                                 key.objectid, key.type, num_bytes);
5456                         goto out;
5457                 }
5458                 ptr += btrfs_extent_inline_ref_size(type);
5459         }
5460         WARN_ON(ptr > end);
5461 out:
5462         return 0;
5463 }
5464
5465 static int check_cache_range(struct btrfs_root *root,
5466                              struct btrfs_block_group_cache *cache,
5467                              u64 offset, u64 bytes)
5468 {
5469         struct btrfs_free_space *entry;
5470         u64 *logical;
5471         u64 bytenr;
5472         int stripe_len;
5473         int i, nr, ret;
5474
5475         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5476                 bytenr = btrfs_sb_offset(i);
5477                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5478                                        cache->key.objectid, bytenr, 0,
5479                                        &logical, &nr, &stripe_len);
5480                 if (ret)
5481                         return ret;
5482
5483                 while (nr--) {
5484                         if (logical[nr] + stripe_len <= offset)
5485                                 continue;
5486                         if (offset + bytes <= logical[nr])
5487                                 continue;
5488                         if (logical[nr] == offset) {
5489                                 if (stripe_len >= bytes) {
5490                                         kfree(logical);
5491                                         return 0;
5492                                 }
5493                                 bytes -= stripe_len;
5494                                 offset += stripe_len;
5495                         } else if (logical[nr] < offset) {
5496                                 if (logical[nr] + stripe_len >=
5497                                     offset + bytes) {
5498                                         kfree(logical);
5499                                         return 0;
5500                                 }
5501                                 bytes = (offset + bytes) -
5502                                         (logical[nr] + stripe_len);
5503                                 offset = logical[nr] + stripe_len;
5504                         } else {
5505                                 /*
5506                                  * Could be tricky, the super may land in the
5507                                  * middle of the area we're checking.  First
5508                                  * check the easiest case, it's at the end.
5509                                  */
5510                                 if (logical[nr] + stripe_len >=
5511                                     bytes + offset) {
5512                                         bytes = logical[nr] - offset;
5513                                         continue;
5514                                 }
5515
5516                                 /* Check the left side */
5517                                 ret = check_cache_range(root, cache,
5518                                                         offset,
5519                                                         logical[nr] - offset);
5520                                 if (ret) {
5521                                         kfree(logical);
5522                                         return ret;
5523                                 }
5524
5525                                 /* Now we continue with the right side */
5526                                 bytes = (offset + bytes) -
5527                                         (logical[nr] + stripe_len);
5528                                 offset = logical[nr] + stripe_len;
5529                         }
5530                 }
5531
5532                 kfree(logical);
5533         }
5534
5535         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5536         if (!entry) {
5537                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5538                         offset, offset+bytes);
5539                 return -EINVAL;
5540         }
5541
5542         if (entry->offset != offset) {
5543                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5544                         entry->offset);
5545                 return -EINVAL;
5546         }
5547
5548         if (entry->bytes != bytes) {
5549                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5550                         bytes, entry->bytes, offset);
5551                 return -EINVAL;
5552         }
5553
5554         unlink_free_space(cache->free_space_ctl, entry);
5555         free(entry);
5556         return 0;
5557 }
5558
5559 static int verify_space_cache(struct btrfs_root *root,
5560                               struct btrfs_block_group_cache *cache)
5561 {
5562         struct btrfs_path *path;
5563         struct extent_buffer *leaf;
5564         struct btrfs_key key;
5565         u64 last;
5566         int ret = 0;
5567
5568         path = btrfs_alloc_path();
5569         if (!path)
5570                 return -ENOMEM;
5571
5572         root = root->fs_info->extent_root;
5573
5574         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5575
5576         key.objectid = last;
5577         key.offset = 0;
5578         key.type = BTRFS_EXTENT_ITEM_KEY;
5579
5580         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5581         if (ret < 0)
5582                 goto out;
5583         ret = 0;
5584         while (1) {
5585                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5586                         ret = btrfs_next_leaf(root, path);
5587                         if (ret < 0)
5588                                 goto out;
5589                         if (ret > 0) {
5590                                 ret = 0;
5591                                 break;
5592                         }
5593                 }
5594                 leaf = path->nodes[0];
5595                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5596                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5597                         break;
5598                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5599                     key.type != BTRFS_METADATA_ITEM_KEY) {
5600                         path->slots[0]++;
5601                         continue;
5602                 }
5603
5604                 if (last == key.objectid) {
5605                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5606                                 last = key.objectid + key.offset;
5607                         else
5608                                 last = key.objectid + root->nodesize;
5609                         path->slots[0]++;
5610                         continue;
5611                 }
5612
5613                 ret = check_cache_range(root, cache, last,
5614                                         key.objectid - last);
5615                 if (ret)
5616                         break;
5617                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5618                         last = key.objectid + key.offset;
5619                 else
5620                         last = key.objectid + root->nodesize;
5621                 path->slots[0]++;
5622         }
5623
5624         if (last < cache->key.objectid + cache->key.offset)
5625                 ret = check_cache_range(root, cache, last,
5626                                         cache->key.objectid +
5627                                         cache->key.offset - last);
5628
5629 out:
5630         btrfs_free_path(path);
5631
5632         if (!ret &&
5633             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5634                 fprintf(stderr, "There are still entries left in the space "
5635                         "cache\n");
5636                 ret = -EINVAL;
5637         }
5638
5639         return ret;
5640 }
5641
5642 static int check_space_cache(struct btrfs_root *root)
5643 {
5644         struct btrfs_block_group_cache *cache;
5645         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5646         int ret;
5647         int error = 0;
5648
5649         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5650             btrfs_super_generation(root->fs_info->super_copy) !=
5651             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5652                 printf("cache and super generation don't match, space cache "
5653                        "will be invalidated\n");
5654                 return 0;
5655         }
5656
5657         if (ctx.progress_enabled) {
5658                 ctx.tp = TASK_FREE_SPACE;
5659                 task_start(ctx.info);
5660         }
5661
5662         while (1) {
5663                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5664                 if (!cache)
5665                         break;
5666
5667                 start = cache->key.objectid + cache->key.offset;
5668                 if (!cache->free_space_ctl) {
5669                         if (btrfs_init_free_space_ctl(cache,
5670                                                       root->sectorsize)) {
5671                                 ret = -ENOMEM;
5672                                 break;
5673                         }
5674                 } else {
5675                         btrfs_remove_free_space_cache(cache);
5676                 }
5677
5678                 if (btrfs_fs_compat_ro(root->fs_info,
5679                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5680                         ret = exclude_super_stripes(root, cache);
5681                         if (ret) {
5682                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5683                                         strerror(-ret));
5684                                 error++;
5685                                 continue;
5686                         }
5687                         ret = load_free_space_tree(root->fs_info, cache);
5688                         free_excluded_extents(root, cache);
5689                         if (ret < 0) {
5690                                 fprintf(stderr, "could not load free space tree: %s\n",
5691                                         strerror(-ret));
5692                                 error++;
5693                                 continue;
5694                         }
5695                         error += ret;
5696                 } else {
5697                         ret = load_free_space_cache(root->fs_info, cache);
5698                         if (!ret)
5699                                 continue;
5700                 }
5701
5702                 ret = verify_space_cache(root, cache);
5703                 if (ret) {
5704                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5705                                 cache->key.objectid);
5706                         error++;
5707                 }
5708         }
5709
5710         task_stop(ctx.info);
5711
5712         return error ? -EINVAL : 0;
5713 }
5714
5715 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5716                         u64 num_bytes, unsigned long leaf_offset,
5717                         struct extent_buffer *eb) {
5718
5719         u64 offset = 0;
5720         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5721         char *data;
5722         unsigned long csum_offset;
5723         u32 csum;
5724         u32 csum_expected;
5725         u64 read_len;
5726         u64 data_checked = 0;
5727         u64 tmp;
5728         int ret = 0;
5729         int mirror;
5730         int num_copies;
5731
5732         if (num_bytes % root->sectorsize)
5733                 return -EINVAL;
5734
5735         data = malloc(num_bytes);
5736         if (!data)
5737                 return -ENOMEM;
5738
5739         while (offset < num_bytes) {
5740                 mirror = 0;
5741 again:
5742                 read_len = num_bytes - offset;
5743                 /* read as much space once a time */
5744                 ret = read_extent_data(root, data + offset,
5745                                 bytenr + offset, &read_len, mirror);
5746                 if (ret)
5747                         goto out;
5748                 data_checked = 0;
5749                 /* verify every 4k data's checksum */
5750                 while (data_checked < read_len) {
5751                         csum = ~(u32)0;
5752                         tmp = offset + data_checked;
5753
5754                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5755                                                csum, root->sectorsize);
5756                         btrfs_csum_final(csum, (char *)&csum);
5757
5758                         csum_offset = leaf_offset +
5759                                  tmp / root->sectorsize * csum_size;
5760                         read_extent_buffer(eb, (char *)&csum_expected,
5761                                            csum_offset, csum_size);
5762                         /* try another mirror */
5763                         if (csum != csum_expected) {
5764                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5765                                                 mirror, bytenr + tmp,
5766                                                 csum, csum_expected);
5767                                 num_copies = btrfs_num_copies(
5768                                                 &root->fs_info->mapping_tree,
5769                                                 bytenr, num_bytes);
5770                                 if (mirror < num_copies - 1) {
5771                                         mirror += 1;
5772                                         goto again;
5773                                 }
5774                         }
5775                         data_checked += root->sectorsize;
5776                 }
5777                 offset += read_len;
5778         }
5779 out:
5780         free(data);
5781         return ret;
5782 }
5783
5784 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5785                                u64 num_bytes)
5786 {
5787         struct btrfs_path *path;
5788         struct extent_buffer *leaf;
5789         struct btrfs_key key;
5790         int ret;
5791
5792         path = btrfs_alloc_path();
5793         if (!path) {
5794                 fprintf(stderr, "Error allocating path\n");
5795                 return -ENOMEM;
5796         }
5797
5798         key.objectid = bytenr;
5799         key.type = BTRFS_EXTENT_ITEM_KEY;
5800         key.offset = (u64)-1;
5801
5802 again:
5803         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5804                                 0, 0);
5805         if (ret < 0) {
5806                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5807                 btrfs_free_path(path);
5808                 return ret;
5809         } else if (ret) {
5810                 if (path->slots[0] > 0) {
5811                         path->slots[0]--;
5812                 } else {
5813                         ret = btrfs_prev_leaf(root, path);
5814                         if (ret < 0) {
5815                                 goto out;
5816                         } else if (ret > 0) {
5817                                 ret = 0;
5818                                 goto out;
5819                         }
5820                 }
5821         }
5822
5823         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5824
5825         /*
5826          * Block group items come before extent items if they have the same
5827          * bytenr, so walk back one more just in case.  Dear future traveller,
5828          * first congrats on mastering time travel.  Now if it's not too much
5829          * trouble could you go back to 2006 and tell Chris to make the
5830          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5831          * EXTENT_ITEM_KEY please?
5832          */
5833         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5834                 if (path->slots[0] > 0) {
5835                         path->slots[0]--;
5836                 } else {
5837                         ret = btrfs_prev_leaf(root, path);
5838                         if (ret < 0) {
5839                                 goto out;
5840                         } else if (ret > 0) {
5841                                 ret = 0;
5842                                 goto out;
5843                         }
5844                 }
5845                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5846         }
5847
5848         while (num_bytes) {
5849                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5850                         ret = btrfs_next_leaf(root, path);
5851                         if (ret < 0) {
5852                                 fprintf(stderr, "Error going to next leaf "
5853                                         "%d\n", ret);
5854                                 btrfs_free_path(path);
5855                                 return ret;
5856                         } else if (ret) {
5857                                 break;
5858                         }
5859                 }
5860                 leaf = path->nodes[0];
5861                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5862                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5863                         path->slots[0]++;
5864                         continue;
5865                 }
5866                 if (key.objectid + key.offset < bytenr) {
5867                         path->slots[0]++;
5868                         continue;
5869                 }
5870                 if (key.objectid > bytenr + num_bytes)
5871                         break;
5872
5873                 if (key.objectid == bytenr) {
5874                         if (key.offset >= num_bytes) {
5875                                 num_bytes = 0;
5876                                 break;
5877                         }
5878                         num_bytes -= key.offset;
5879                         bytenr += key.offset;
5880                 } else if (key.objectid < bytenr) {
5881                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5882                                 num_bytes = 0;
5883                                 break;
5884                         }
5885                         num_bytes = (bytenr + num_bytes) -
5886                                 (key.objectid + key.offset);
5887                         bytenr = key.objectid + key.offset;
5888                 } else {
5889                         if (key.objectid + key.offset < bytenr + num_bytes) {
5890                                 u64 new_start = key.objectid + key.offset;
5891                                 u64 new_bytes = bytenr + num_bytes - new_start;
5892
5893                                 /*
5894                                  * Weird case, the extent is in the middle of
5895                                  * our range, we'll have to search one side
5896                                  * and then the other.  Not sure if this happens
5897                                  * in real life, but no harm in coding it up
5898                                  * anyway just in case.
5899                                  */
5900                                 btrfs_release_path(path);
5901                                 ret = check_extent_exists(root, new_start,
5902                                                           new_bytes);
5903                                 if (ret) {
5904                                         fprintf(stderr, "Right section didn't "
5905                                                 "have a record\n");
5906                                         break;
5907                                 }
5908                                 num_bytes = key.objectid - bytenr;
5909                                 goto again;
5910                         }
5911                         num_bytes = key.objectid - bytenr;
5912                 }
5913                 path->slots[0]++;
5914         }
5915         ret = 0;
5916
5917 out:
5918         if (num_bytes && !ret) {
5919                 fprintf(stderr, "There are no extents for csum range "
5920                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5921                 ret = 1;
5922         }
5923
5924         btrfs_free_path(path);
5925         return ret;
5926 }
5927
5928 static int check_csums(struct btrfs_root *root)
5929 {
5930         struct btrfs_path *path;
5931         struct extent_buffer *leaf;
5932         struct btrfs_key key;
5933         u64 offset = 0, num_bytes = 0;
5934         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5935         int errors = 0;
5936         int ret;
5937         u64 data_len;
5938         unsigned long leaf_offset;
5939
5940         root = root->fs_info->csum_root;
5941         if (!extent_buffer_uptodate(root->node)) {
5942                 fprintf(stderr, "No valid csum tree found\n");
5943                 return -ENOENT;
5944         }
5945
5946         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5947         key.type = BTRFS_EXTENT_CSUM_KEY;
5948         key.offset = 0;
5949
5950         path = btrfs_alloc_path();
5951         if (!path)
5952                 return -ENOMEM;
5953
5954         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5955         if (ret < 0) {
5956                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5957                 btrfs_free_path(path);
5958                 return ret;
5959         }
5960
5961         if (ret > 0 && path->slots[0])
5962                 path->slots[0]--;
5963         ret = 0;
5964
5965         while (1) {
5966                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5967                         ret = btrfs_next_leaf(root, path);
5968                         if (ret < 0) {
5969                                 fprintf(stderr, "Error going to next leaf "
5970                                         "%d\n", ret);
5971                                 break;
5972                         }
5973                         if (ret)
5974                                 break;
5975                 }
5976                 leaf = path->nodes[0];
5977
5978                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5979                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5980                         path->slots[0]++;
5981                         continue;
5982                 }
5983
5984                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5985                               csum_size) * root->sectorsize;
5986                 if (!check_data_csum)
5987                         goto skip_csum_check;
5988                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5989                 ret = check_extent_csums(root, key.offset, data_len,
5990                                          leaf_offset, leaf);
5991                 if (ret)
5992                         break;
5993 skip_csum_check:
5994                 if (!num_bytes) {
5995                         offset = key.offset;
5996                 } else if (key.offset != offset + num_bytes) {
5997                         ret = check_extent_exists(root, offset, num_bytes);
5998                         if (ret) {
5999                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6000                                         "there is no extent record\n",
6001                                         offset, offset+num_bytes);
6002                                 errors++;
6003                         }
6004                         offset = key.offset;
6005                         num_bytes = 0;
6006                 }
6007                 num_bytes += data_len;
6008                 path->slots[0]++;
6009         }
6010
6011         btrfs_free_path(path);
6012         return errors;
6013 }
6014
6015 static int is_dropped_key(struct btrfs_key *key,
6016                           struct btrfs_key *drop_key) {
6017         if (key->objectid < drop_key->objectid)
6018                 return 1;
6019         else if (key->objectid == drop_key->objectid) {
6020                 if (key->type < drop_key->type)
6021                         return 1;
6022                 else if (key->type == drop_key->type) {
6023                         if (key->offset < drop_key->offset)
6024                                 return 1;
6025                 }
6026         }
6027         return 0;
6028 }
6029
6030 /*
6031  * Here are the rules for FULL_BACKREF.
6032  *
6033  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6034  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6035  *      FULL_BACKREF set.
6036  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6037  *    if it happened after the relocation occurred since we'll have dropped the
6038  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6039  *    have no real way to know for sure.
6040  *
6041  * We process the blocks one root at a time, and we start from the lowest root
6042  * objectid and go to the highest.  So we can just lookup the owner backref for
6043  * the record and if we don't find it then we know it doesn't exist and we have
6044  * a FULL BACKREF.
6045  *
6046  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6047  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6048  * be set or not and then we can check later once we've gathered all the refs.
6049  */
6050 static int calc_extent_flag(struct btrfs_root *root,
6051                            struct cache_tree *extent_cache,
6052                            struct extent_buffer *buf,
6053                            struct root_item_record *ri,
6054                            u64 *flags)
6055 {
6056         struct extent_record *rec;
6057         struct cache_extent *cache;
6058         struct tree_backref *tback;
6059         u64 owner = 0;
6060
6061         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6062         /* we have added this extent before */
6063         BUG_ON(!cache);
6064         rec = container_of(cache, struct extent_record, cache);
6065
6066         /*
6067          * Except file/reloc tree, we can not have
6068          * FULL BACKREF MODE
6069          */
6070         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6071                 goto normal;
6072         /*
6073          * root node
6074          */
6075         if (buf->start == ri->bytenr)
6076                 goto normal;
6077
6078         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6079                 goto full_backref;
6080
6081         owner = btrfs_header_owner(buf);
6082         if (owner == ri->objectid)
6083                 goto normal;
6084
6085         tback = find_tree_backref(rec, 0, owner);
6086         if (!tback)
6087                 goto full_backref;
6088 normal:
6089         *flags = 0;
6090         if (rec->flag_block_full_backref != FLAG_UNSET &&
6091             rec->flag_block_full_backref != 0)
6092                 rec->bad_full_backref = 1;
6093         return 0;
6094 full_backref:
6095         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6096         if (rec->flag_block_full_backref != FLAG_UNSET &&
6097             rec->flag_block_full_backref != 1)
6098                 rec->bad_full_backref = 1;
6099         return 0;
6100 }
6101
6102 static int run_next_block(struct btrfs_root *root,
6103                           struct block_info *bits,
6104                           int bits_nr,
6105                           u64 *last,
6106                           struct cache_tree *pending,
6107                           struct cache_tree *seen,
6108                           struct cache_tree *reada,
6109                           struct cache_tree *nodes,
6110                           struct cache_tree *extent_cache,
6111                           struct cache_tree *chunk_cache,
6112                           struct rb_root *dev_cache,
6113                           struct block_group_tree *block_group_cache,
6114                           struct device_extent_tree *dev_extent_cache,
6115                           struct root_item_record *ri)
6116 {
6117         struct extent_buffer *buf;
6118         struct extent_record *rec = NULL;
6119         u64 bytenr;
6120         u32 size;
6121         u64 parent;
6122         u64 owner;
6123         u64 flags;
6124         u64 ptr;
6125         u64 gen = 0;
6126         int ret = 0;
6127         int i;
6128         int nritems;
6129         struct btrfs_key key;
6130         struct cache_extent *cache;
6131         int reada_bits;
6132
6133         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6134                                     bits_nr, &reada_bits);
6135         if (nritems == 0)
6136                 return 1;
6137
6138         if (!reada_bits) {
6139                 for(i = 0; i < nritems; i++) {
6140                         ret = add_cache_extent(reada, bits[i].start,
6141                                                bits[i].size);
6142                         if (ret == -EEXIST)
6143                                 continue;
6144
6145                         /* fixme, get the parent transid */
6146                         readahead_tree_block(root, bits[i].start,
6147                                              bits[i].size, 0);
6148                 }
6149         }
6150         *last = bits[0].start;
6151         bytenr = bits[0].start;
6152         size = bits[0].size;
6153
6154         cache = lookup_cache_extent(pending, bytenr, size);
6155         if (cache) {
6156                 remove_cache_extent(pending, cache);
6157                 free(cache);
6158         }
6159         cache = lookup_cache_extent(reada, bytenr, size);
6160         if (cache) {
6161                 remove_cache_extent(reada, cache);
6162                 free(cache);
6163         }
6164         cache = lookup_cache_extent(nodes, bytenr, size);
6165         if (cache) {
6166                 remove_cache_extent(nodes, cache);
6167                 free(cache);
6168         }
6169         cache = lookup_cache_extent(extent_cache, bytenr, size);
6170         if (cache) {
6171                 rec = container_of(cache, struct extent_record, cache);
6172                 gen = rec->parent_generation;
6173         }
6174
6175         /* fixme, get the real parent transid */
6176         buf = read_tree_block(root, bytenr, size, gen);
6177         if (!extent_buffer_uptodate(buf)) {
6178                 record_bad_block_io(root->fs_info,
6179                                     extent_cache, bytenr, size);
6180                 goto out;
6181         }
6182
6183         nritems = btrfs_header_nritems(buf);
6184
6185         flags = 0;
6186         if (!init_extent_tree) {
6187                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6188                                        btrfs_header_level(buf), 1, NULL,
6189                                        &flags);
6190                 if (ret < 0) {
6191                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6192                         if (ret < 0) {
6193                                 fprintf(stderr, "Couldn't calc extent flags\n");
6194                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6195                         }
6196                 }
6197         } else {
6198                 flags = 0;
6199                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6200                 if (ret < 0) {
6201                         fprintf(stderr, "Couldn't calc extent flags\n");
6202                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6203                 }
6204         }
6205
6206         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6207                 if (ri != NULL &&
6208                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6209                     ri->objectid == btrfs_header_owner(buf)) {
6210                         /*
6211                          * Ok we got to this block from it's original owner and
6212                          * we have FULL_BACKREF set.  Relocation can leave
6213                          * converted blocks over so this is altogether possible,
6214                          * however it's not possible if the generation > the
6215                          * last snapshot, so check for this case.
6216                          */
6217                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6218                             btrfs_header_generation(buf) > ri->last_snapshot) {
6219                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6220                                 rec->bad_full_backref = 1;
6221                         }
6222                 }
6223         } else {
6224                 if (ri != NULL &&
6225                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6226                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6227                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6228                         rec->bad_full_backref = 1;
6229                 }
6230         }
6231
6232         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6233                 rec->flag_block_full_backref = 1;
6234                 parent = bytenr;
6235                 owner = 0;
6236         } else {
6237                 rec->flag_block_full_backref = 0;
6238                 parent = 0;
6239                 owner = btrfs_header_owner(buf);
6240         }
6241
6242         ret = check_block(root, extent_cache, buf, flags);
6243         if (ret)
6244                 goto out;
6245
6246         if (btrfs_is_leaf(buf)) {
6247                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6248                 for (i = 0; i < nritems; i++) {
6249                         struct btrfs_file_extent_item *fi;
6250                         btrfs_item_key_to_cpu(buf, &key, i);
6251                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6252                                 process_extent_item(root, extent_cache, buf,
6253                                                     i);
6254                                 continue;
6255                         }
6256                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6257                                 process_extent_item(root, extent_cache, buf,
6258                                                     i);
6259                                 continue;
6260                         }
6261                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6262                                 total_csum_bytes +=
6263                                         btrfs_item_size_nr(buf, i);
6264                                 continue;
6265                         }
6266                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6267                                 process_chunk_item(chunk_cache, &key, buf, i);
6268                                 continue;
6269                         }
6270                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6271                                 process_device_item(dev_cache, &key, buf, i);
6272                                 continue;
6273                         }
6274                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6275                                 process_block_group_item(block_group_cache,
6276                                         &key, buf, i);
6277                                 continue;
6278                         }
6279                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6280                                 process_device_extent_item(dev_extent_cache,
6281                                         &key, buf, i);
6282                                 continue;
6283
6284                         }
6285                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6286 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6287                                 process_extent_ref_v0(extent_cache, buf, i);
6288 #else
6289                                 BUG();
6290 #endif
6291                                 continue;
6292                         }
6293
6294                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6295                                 add_tree_backref(extent_cache, key.objectid, 0,
6296                                                  key.offset, 0);
6297                                 continue;
6298                         }
6299                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6300                                 add_tree_backref(extent_cache, key.objectid,
6301                                                  key.offset, 0, 0);
6302                                 continue;
6303                         }
6304                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6305                                 struct btrfs_extent_data_ref *ref;
6306                                 ref = btrfs_item_ptr(buf, i,
6307                                                 struct btrfs_extent_data_ref);
6308                                 add_data_backref(extent_cache,
6309                                         key.objectid, 0,
6310                                         btrfs_extent_data_ref_root(buf, ref),
6311                                         btrfs_extent_data_ref_objectid(buf,
6312                                                                        ref),
6313                                         btrfs_extent_data_ref_offset(buf, ref),
6314                                         btrfs_extent_data_ref_count(buf, ref),
6315                                         0, root->sectorsize);
6316                                 continue;
6317                         }
6318                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6319                                 struct btrfs_shared_data_ref *ref;
6320                                 ref = btrfs_item_ptr(buf, i,
6321                                                 struct btrfs_shared_data_ref);
6322                                 add_data_backref(extent_cache,
6323                                         key.objectid, key.offset, 0, 0, 0,
6324                                         btrfs_shared_data_ref_count(buf, ref),
6325                                         0, root->sectorsize);
6326                                 continue;
6327                         }
6328                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6329                                 struct bad_item *bad;
6330
6331                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6332                                         continue;
6333                                 if (!owner)
6334                                         continue;
6335                                 bad = malloc(sizeof(struct bad_item));
6336                                 if (!bad)
6337                                         continue;
6338                                 INIT_LIST_HEAD(&bad->list);
6339                                 memcpy(&bad->key, &key,
6340                                        sizeof(struct btrfs_key));
6341                                 bad->root_id = owner;
6342                                 list_add_tail(&bad->list, &delete_items);
6343                                 continue;
6344                         }
6345                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6346                                 continue;
6347                         fi = btrfs_item_ptr(buf, i,
6348                                             struct btrfs_file_extent_item);
6349                         if (btrfs_file_extent_type(buf, fi) ==
6350                             BTRFS_FILE_EXTENT_INLINE)
6351                                 continue;
6352                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6353                                 continue;
6354
6355                         data_bytes_allocated +=
6356                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6357                         if (data_bytes_allocated < root->sectorsize) {
6358                                 abort();
6359                         }
6360                         data_bytes_referenced +=
6361                                 btrfs_file_extent_num_bytes(buf, fi);
6362                         add_data_backref(extent_cache,
6363                                 btrfs_file_extent_disk_bytenr(buf, fi),
6364                                 parent, owner, key.objectid, key.offset -
6365                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6366                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6367                 }
6368         } else {
6369                 int level;
6370                 struct btrfs_key first_key;
6371
6372                 first_key.objectid = 0;
6373
6374                 if (nritems > 0)
6375                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6376                 level = btrfs_header_level(buf);
6377                 for (i = 0; i < nritems; i++) {
6378                         struct extent_record tmpl;
6379
6380                         ptr = btrfs_node_blockptr(buf, i);
6381                         size = root->nodesize;
6382                         btrfs_node_key_to_cpu(buf, &key, i);
6383                         if (ri != NULL) {
6384                                 if ((level == ri->drop_level)
6385                                     && is_dropped_key(&key, &ri->drop_key)) {
6386                                         continue;
6387                                 }
6388                         }
6389
6390                         memset(&tmpl, 0, sizeof(tmpl));
6391                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6392                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6393                         tmpl.start = ptr;
6394                         tmpl.nr = size;
6395                         tmpl.refs = 1;
6396                         tmpl.metadata = 1;
6397                         tmpl.max_size = size;
6398                         ret = add_extent_rec(extent_cache, &tmpl);
6399                         BUG_ON(ret);
6400
6401                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6402
6403                         if (level > 1) {
6404                                 add_pending(nodes, seen, ptr, size);
6405                         } else {
6406                                 add_pending(pending, seen, ptr, size);
6407                         }
6408                 }
6409                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6410                                       nritems) * sizeof(struct btrfs_key_ptr);
6411         }
6412         total_btree_bytes += buf->len;
6413         if (fs_root_objectid(btrfs_header_owner(buf)))
6414                 total_fs_tree_bytes += buf->len;
6415         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6416                 total_extent_tree_bytes += buf->len;
6417         if (!found_old_backref &&
6418             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6419             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6420             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6421                 found_old_backref = 1;
6422 out:
6423         free_extent_buffer(buf);
6424         return ret;
6425 }
6426
6427 static int add_root_to_pending(struct extent_buffer *buf,
6428                                struct cache_tree *extent_cache,
6429                                struct cache_tree *pending,
6430                                struct cache_tree *seen,
6431                                struct cache_tree *nodes,
6432                                u64 objectid)
6433 {
6434         struct extent_record tmpl;
6435
6436         if (btrfs_header_level(buf) > 0)
6437                 add_pending(nodes, seen, buf->start, buf->len);
6438         else
6439                 add_pending(pending, seen, buf->start, buf->len);
6440
6441         memset(&tmpl, 0, sizeof(tmpl));
6442         tmpl.start = buf->start;
6443         tmpl.nr = buf->len;
6444         tmpl.is_root = 1;
6445         tmpl.refs = 1;
6446         tmpl.metadata = 1;
6447         tmpl.max_size = buf->len;
6448         add_extent_rec(extent_cache, &tmpl);
6449
6450         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6451             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6452                 add_tree_backref(extent_cache, buf->start, buf->start,
6453                                  0, 1);
6454         else
6455                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6456         return 0;
6457 }
6458
6459 /* as we fix the tree, we might be deleting blocks that
6460  * we're tracking for repair.  This hook makes sure we
6461  * remove any backrefs for blocks as we are fixing them.
6462  */
6463 static int free_extent_hook(struct btrfs_trans_handle *trans,
6464                             struct btrfs_root *root,
6465                             u64 bytenr, u64 num_bytes, u64 parent,
6466                             u64 root_objectid, u64 owner, u64 offset,
6467                             int refs_to_drop)
6468 {
6469         struct extent_record *rec;
6470         struct cache_extent *cache;
6471         int is_data;
6472         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6473
6474         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6475         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6476         if (!cache)
6477                 return 0;
6478
6479         rec = container_of(cache, struct extent_record, cache);
6480         if (is_data) {
6481                 struct data_backref *back;
6482                 back = find_data_backref(rec, parent, root_objectid, owner,
6483                                          offset, 1, bytenr, num_bytes);
6484                 if (!back)
6485                         goto out;
6486                 if (back->node.found_ref) {
6487                         back->found_ref -= refs_to_drop;
6488                         if (rec->refs)
6489                                 rec->refs -= refs_to_drop;
6490                 }
6491                 if (back->node.found_extent_tree) {
6492                         back->num_refs -= refs_to_drop;
6493                         if (rec->extent_item_refs)
6494                                 rec->extent_item_refs -= refs_to_drop;
6495                 }
6496                 if (back->found_ref == 0)
6497                         back->node.found_ref = 0;
6498                 if (back->num_refs == 0)
6499                         back->node.found_extent_tree = 0;
6500
6501                 if (!back->node.found_extent_tree && back->node.found_ref) {
6502                         rb_erase(&back->node.node, &rec->backref_tree);
6503                         free(back);
6504                 }
6505         } else {
6506                 struct tree_backref *back;
6507                 back = find_tree_backref(rec, parent, root_objectid);
6508                 if (!back)
6509                         goto out;
6510                 if (back->node.found_ref) {
6511                         if (rec->refs)
6512                                 rec->refs--;
6513                         back->node.found_ref = 0;
6514                 }
6515                 if (back->node.found_extent_tree) {
6516                         if (rec->extent_item_refs)
6517                                 rec->extent_item_refs--;
6518                         back->node.found_extent_tree = 0;
6519                 }
6520                 if (!back->node.found_extent_tree && back->node.found_ref) {
6521                         rb_erase(&back->node.node, &rec->backref_tree);
6522                         free(back);
6523                 }
6524         }
6525         maybe_free_extent_rec(extent_cache, rec);
6526 out:
6527         return 0;
6528 }
6529
6530 static int delete_extent_records(struct btrfs_trans_handle *trans,
6531                                  struct btrfs_root *root,
6532                                  struct btrfs_path *path,
6533                                  u64 bytenr, u64 new_len)
6534 {
6535         struct btrfs_key key;
6536         struct btrfs_key found_key;
6537         struct extent_buffer *leaf;
6538         int ret;
6539         int slot;
6540
6541
6542         key.objectid = bytenr;
6543         key.type = (u8)-1;
6544         key.offset = (u64)-1;
6545
6546         while(1) {
6547                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6548                                         &key, path, 0, 1);
6549                 if (ret < 0)
6550                         break;
6551
6552                 if (ret > 0) {
6553                         ret = 0;
6554                         if (path->slots[0] == 0)
6555                                 break;
6556                         path->slots[0]--;
6557                 }
6558                 ret = 0;
6559
6560                 leaf = path->nodes[0];
6561                 slot = path->slots[0];
6562
6563                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6564                 if (found_key.objectid != bytenr)
6565                         break;
6566
6567                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6568                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6569                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6570                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6571                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6572                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6573                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6574                         btrfs_release_path(path);
6575                         if (found_key.type == 0) {
6576                                 if (found_key.offset == 0)
6577                                         break;
6578                                 key.offset = found_key.offset - 1;
6579                                 key.type = found_key.type;
6580                         }
6581                         key.type = found_key.type - 1;
6582                         key.offset = (u64)-1;
6583                         continue;
6584                 }
6585
6586                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6587                         found_key.objectid, found_key.type, found_key.offset);
6588
6589                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6590                 if (ret)
6591                         break;
6592                 btrfs_release_path(path);
6593
6594                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6595                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6596                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6597                                 found_key.offset : root->nodesize;
6598
6599                         ret = btrfs_update_block_group(trans, root, bytenr,
6600                                                        bytes, 0, 0);
6601                         if (ret)
6602                                 break;
6603                 }
6604         }
6605
6606         btrfs_release_path(path);
6607         return ret;
6608 }
6609
6610 /*
6611  * for a single backref, this will allocate a new extent
6612  * and add the backref to it.
6613  */
6614 static int record_extent(struct btrfs_trans_handle *trans,
6615                          struct btrfs_fs_info *info,
6616                          struct btrfs_path *path,
6617                          struct extent_record *rec,
6618                          struct extent_backref *back,
6619                          int allocated, u64 flags)
6620 {
6621         int ret;
6622         struct btrfs_root *extent_root = info->extent_root;
6623         struct extent_buffer *leaf;
6624         struct btrfs_key ins_key;
6625         struct btrfs_extent_item *ei;
6626         struct tree_backref *tback;
6627         struct data_backref *dback;
6628         struct btrfs_tree_block_info *bi;
6629
6630         if (!back->is_data)
6631                 rec->max_size = max_t(u64, rec->max_size,
6632                                     info->extent_root->nodesize);
6633
6634         if (!allocated) {
6635                 u32 item_size = sizeof(*ei);
6636
6637                 if (!back->is_data)
6638                         item_size += sizeof(*bi);
6639
6640                 ins_key.objectid = rec->start;
6641                 ins_key.offset = rec->max_size;
6642                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6643
6644                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6645                                         &ins_key, item_size);
6646                 if (ret)
6647                         goto fail;
6648
6649                 leaf = path->nodes[0];
6650                 ei = btrfs_item_ptr(leaf, path->slots[0],
6651                                     struct btrfs_extent_item);
6652
6653                 btrfs_set_extent_refs(leaf, ei, 0);
6654                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6655
6656                 if (back->is_data) {
6657                         btrfs_set_extent_flags(leaf, ei,
6658                                                BTRFS_EXTENT_FLAG_DATA);
6659                 } else {
6660                         struct btrfs_disk_key copy_key;;
6661
6662                         tback = to_tree_backref(back);
6663                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6664                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6665                                              sizeof(*bi));
6666
6667                         btrfs_set_disk_key_objectid(&copy_key,
6668                                                     rec->info_objectid);
6669                         btrfs_set_disk_key_type(&copy_key, 0);
6670                         btrfs_set_disk_key_offset(&copy_key, 0);
6671
6672                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6673                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6674
6675                         btrfs_set_extent_flags(leaf, ei,
6676                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6677                 }
6678
6679                 btrfs_mark_buffer_dirty(leaf);
6680                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6681                                                rec->max_size, 1, 0);
6682                 if (ret)
6683                         goto fail;
6684                 btrfs_release_path(path);
6685         }
6686
6687         if (back->is_data) {
6688                 u64 parent;
6689                 int i;
6690
6691                 dback = to_data_backref(back);
6692                 if (back->full_backref)
6693                         parent = dback->parent;
6694                 else
6695                         parent = 0;
6696
6697                 for (i = 0; i < dback->found_ref; i++) {
6698                         /* if parent != 0, we're doing a full backref
6699                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6700                          * just makes the backref allocator create a data
6701                          * backref
6702                          */
6703                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6704                                                    rec->start, rec->max_size,
6705                                                    parent,
6706                                                    dback->root,
6707                                                    parent ?
6708                                                    BTRFS_FIRST_FREE_OBJECTID :
6709                                                    dback->owner,
6710                                                    dback->offset);
6711                         if (ret)
6712                                 break;
6713                 }
6714                 fprintf(stderr, "adding new data backref"
6715                                 " on %llu %s %llu owner %llu"
6716                                 " offset %llu found %d\n",
6717                                 (unsigned long long)rec->start,
6718                                 back->full_backref ?
6719                                 "parent" : "root",
6720                                 back->full_backref ?
6721                                 (unsigned long long)parent :
6722                                 (unsigned long long)dback->root,
6723                                 (unsigned long long)dback->owner,
6724                                 (unsigned long long)dback->offset,
6725                                 dback->found_ref);
6726         } else {
6727                 u64 parent;
6728
6729                 tback = to_tree_backref(back);
6730                 if (back->full_backref)
6731                         parent = tback->parent;
6732                 else
6733                         parent = 0;
6734
6735                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6736                                            rec->start, rec->max_size,
6737                                            parent, tback->root, 0, 0);
6738                 fprintf(stderr, "adding new tree backref on "
6739                         "start %llu len %llu parent %llu root %llu\n",
6740                         rec->start, rec->max_size, parent, tback->root);
6741         }
6742 fail:
6743         btrfs_release_path(path);
6744         return ret;
6745 }
6746
6747 static struct extent_entry *find_entry(struct list_head *entries,
6748                                        u64 bytenr, u64 bytes)
6749 {
6750         struct extent_entry *entry = NULL;
6751
6752         list_for_each_entry(entry, entries, list) {
6753                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6754                         return entry;
6755         }
6756
6757         return NULL;
6758 }
6759
6760 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6761 {
6762         struct extent_entry *entry, *best = NULL, *prev = NULL;
6763
6764         list_for_each_entry(entry, entries, list) {
6765                 if (!prev) {
6766                         prev = entry;
6767                         continue;
6768                 }
6769
6770                 /*
6771                  * If there are as many broken entries as entries then we know
6772                  * not to trust this particular entry.
6773                  */
6774                 if (entry->broken == entry->count)
6775                         continue;
6776
6777                 /*
6778                  * If our current entry == best then we can't be sure our best
6779                  * is really the best, so we need to keep searching.
6780                  */
6781                 if (best && best->count == entry->count) {
6782                         prev = entry;
6783                         best = NULL;
6784                         continue;
6785                 }
6786
6787                 /* Prev == entry, not good enough, have to keep searching */
6788                 if (!prev->broken && prev->count == entry->count)
6789                         continue;
6790
6791                 if (!best)
6792                         best = (prev->count > entry->count) ? prev : entry;
6793                 else if (best->count < entry->count)
6794                         best = entry;
6795                 prev = entry;
6796         }
6797
6798         return best;
6799 }
6800
6801 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6802                       struct data_backref *dback, struct extent_entry *entry)
6803 {
6804         struct btrfs_trans_handle *trans;
6805         struct btrfs_root *root;
6806         struct btrfs_file_extent_item *fi;
6807         struct extent_buffer *leaf;
6808         struct btrfs_key key;
6809         u64 bytenr, bytes;
6810         int ret, err;
6811
6812         key.objectid = dback->root;
6813         key.type = BTRFS_ROOT_ITEM_KEY;
6814         key.offset = (u64)-1;
6815         root = btrfs_read_fs_root(info, &key);
6816         if (IS_ERR(root)) {
6817                 fprintf(stderr, "Couldn't find root for our ref\n");
6818                 return -EINVAL;
6819         }
6820
6821         /*
6822          * The backref points to the original offset of the extent if it was
6823          * split, so we need to search down to the offset we have and then walk
6824          * forward until we find the backref we're looking for.
6825          */
6826         key.objectid = dback->owner;
6827         key.type = BTRFS_EXTENT_DATA_KEY;
6828         key.offset = dback->offset;
6829         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6830         if (ret < 0) {
6831                 fprintf(stderr, "Error looking up ref %d\n", ret);
6832                 return ret;
6833         }
6834
6835         while (1) {
6836                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6837                         ret = btrfs_next_leaf(root, path);
6838                         if (ret) {
6839                                 fprintf(stderr, "Couldn't find our ref, next\n");
6840                                 return -EINVAL;
6841                         }
6842                 }
6843                 leaf = path->nodes[0];
6844                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6845                 if (key.objectid != dback->owner ||
6846                     key.type != BTRFS_EXTENT_DATA_KEY) {
6847                         fprintf(stderr, "Couldn't find our ref, search\n");
6848                         return -EINVAL;
6849                 }
6850                 fi = btrfs_item_ptr(leaf, path->slots[0],
6851                                     struct btrfs_file_extent_item);
6852                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6853                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6854
6855                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6856                         break;
6857                 path->slots[0]++;
6858         }
6859
6860         btrfs_release_path(path);
6861
6862         trans = btrfs_start_transaction(root, 1);
6863         if (IS_ERR(trans))
6864                 return PTR_ERR(trans);
6865
6866         /*
6867          * Ok we have the key of the file extent we want to fix, now we can cow
6868          * down to the thing and fix it.
6869          */
6870         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6871         if (ret < 0) {
6872                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6873                         key.objectid, key.type, key.offset, ret);
6874                 goto out;
6875         }
6876         if (ret > 0) {
6877                 fprintf(stderr, "Well that's odd, we just found this key "
6878                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6879                         key.offset);
6880                 ret = -EINVAL;
6881                 goto out;
6882         }
6883         leaf = path->nodes[0];
6884         fi = btrfs_item_ptr(leaf, path->slots[0],
6885                             struct btrfs_file_extent_item);
6886
6887         if (btrfs_file_extent_compression(leaf, fi) &&
6888             dback->disk_bytenr != entry->bytenr) {
6889                 fprintf(stderr, "Ref doesn't match the record start and is "
6890                         "compressed, please take a btrfs-image of this file "
6891                         "system and send it to a btrfs developer so they can "
6892                         "complete this functionality for bytenr %Lu\n",
6893                         dback->disk_bytenr);
6894                 ret = -EINVAL;
6895                 goto out;
6896         }
6897
6898         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6899                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6900         } else if (dback->disk_bytenr > entry->bytenr) {
6901                 u64 off_diff, offset;
6902
6903                 off_diff = dback->disk_bytenr - entry->bytenr;
6904                 offset = btrfs_file_extent_offset(leaf, fi);
6905                 if (dback->disk_bytenr + offset +
6906                     btrfs_file_extent_num_bytes(leaf, fi) >
6907                     entry->bytenr + entry->bytes) {
6908                         fprintf(stderr, "Ref is past the entry end, please "
6909                                 "take a btrfs-image of this file system and "
6910                                 "send it to a btrfs developer, ref %Lu\n",
6911                                 dback->disk_bytenr);
6912                         ret = -EINVAL;
6913                         goto out;
6914                 }
6915                 offset += off_diff;
6916                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6917                 btrfs_set_file_extent_offset(leaf, fi, offset);
6918         } else if (dback->disk_bytenr < entry->bytenr) {
6919                 u64 offset;
6920
6921                 offset = btrfs_file_extent_offset(leaf, fi);
6922                 if (dback->disk_bytenr + offset < entry->bytenr) {
6923                         fprintf(stderr, "Ref is before the entry start, please"
6924                                 " take a btrfs-image of this file system and "
6925                                 "send it to a btrfs developer, ref %Lu\n",
6926                                 dback->disk_bytenr);
6927                         ret = -EINVAL;
6928                         goto out;
6929                 }
6930
6931                 offset += dback->disk_bytenr;
6932                 offset -= entry->bytenr;
6933                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6934                 btrfs_set_file_extent_offset(leaf, fi, offset);
6935         }
6936
6937         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6938
6939         /*
6940          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6941          * only do this if we aren't using compression, otherwise it's a
6942          * trickier case.
6943          */
6944         if (!btrfs_file_extent_compression(leaf, fi))
6945                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6946         else
6947                 printf("ram bytes may be wrong?\n");
6948         btrfs_mark_buffer_dirty(leaf);
6949 out:
6950         err = btrfs_commit_transaction(trans, root);
6951         btrfs_release_path(path);
6952         return ret ? ret : err;
6953 }
6954
6955 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6956                            struct extent_record *rec)
6957 {
6958         struct extent_backref *back, *tmp;
6959         struct data_backref *dback;
6960         struct extent_entry *entry, *best = NULL;
6961         LIST_HEAD(entries);
6962         int nr_entries = 0;
6963         int broken_entries = 0;
6964         int ret = 0;
6965         short mismatch = 0;
6966
6967         /*
6968          * Metadata is easy and the backrefs should always agree on bytenr and
6969          * size, if not we've got bigger issues.
6970          */
6971         if (rec->metadata)
6972                 return 0;
6973
6974         rbtree_postorder_for_each_entry_safe(back, tmp,
6975                                              &rec->backref_tree, node) {
6976                 if (back->full_backref || !back->is_data)
6977                         continue;
6978
6979                 dback = to_data_backref(back);
6980
6981                 /*
6982                  * We only pay attention to backrefs that we found a real
6983                  * backref for.
6984                  */
6985                 if (dback->found_ref == 0)
6986                         continue;
6987
6988                 /*
6989                  * For now we only catch when the bytes don't match, not the
6990                  * bytenr.  We can easily do this at the same time, but I want
6991                  * to have a fs image to test on before we just add repair
6992                  * functionality willy-nilly so we know we won't screw up the
6993                  * repair.
6994                  */
6995
6996                 entry = find_entry(&entries, dback->disk_bytenr,
6997                                    dback->bytes);
6998                 if (!entry) {
6999                         entry = malloc(sizeof(struct extent_entry));
7000                         if (!entry) {
7001                                 ret = -ENOMEM;
7002                                 goto out;
7003                         }
7004                         memset(entry, 0, sizeof(*entry));
7005                         entry->bytenr = dback->disk_bytenr;
7006                         entry->bytes = dback->bytes;
7007                         list_add_tail(&entry->list, &entries);
7008                         nr_entries++;
7009                 }
7010
7011                 /*
7012                  * If we only have on entry we may think the entries agree when
7013                  * in reality they don't so we have to do some extra checking.
7014                  */
7015                 if (dback->disk_bytenr != rec->start ||
7016                     dback->bytes != rec->nr || back->broken)
7017                         mismatch = 1;
7018
7019                 if (back->broken) {
7020                         entry->broken++;
7021                         broken_entries++;
7022                 }
7023
7024                 entry->count++;
7025         }
7026
7027         /* Yay all the backrefs agree, carry on good sir */
7028         if (nr_entries <= 1 && !mismatch)
7029                 goto out;
7030
7031         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7032                 "%Lu\n", rec->start);
7033
7034         /*
7035          * First we want to see if the backrefs can agree amongst themselves who
7036          * is right, so figure out which one of the entries has the highest
7037          * count.
7038          */
7039         best = find_most_right_entry(&entries);
7040
7041         /*
7042          * Ok so we may have an even split between what the backrefs think, so
7043          * this is where we use the extent ref to see what it thinks.
7044          */
7045         if (!best) {
7046                 entry = find_entry(&entries, rec->start, rec->nr);
7047                 if (!entry && (!broken_entries || !rec->found_rec)) {
7048                         fprintf(stderr, "Backrefs don't agree with each other "
7049                                 "and extent record doesn't agree with anybody,"
7050                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7051                                 rec->start, rec->nr);
7052                         ret = -EINVAL;
7053                         goto out;
7054                 } else if (!entry) {
7055                         /*
7056                          * Ok our backrefs were broken, we'll assume this is the
7057                          * correct value and add an entry for this range.
7058                          */
7059                         entry = malloc(sizeof(struct extent_entry));
7060                         if (!entry) {
7061                                 ret = -ENOMEM;
7062                                 goto out;
7063                         }
7064                         memset(entry, 0, sizeof(*entry));
7065                         entry->bytenr = rec->start;
7066                         entry->bytes = rec->nr;
7067                         list_add_tail(&entry->list, &entries);
7068                         nr_entries++;
7069                 }
7070                 entry->count++;
7071                 best = find_most_right_entry(&entries);
7072                 if (!best) {
7073                         fprintf(stderr, "Backrefs and extent record evenly "
7074                                 "split on who is right, this is going to "
7075                                 "require user input to fix bytenr %Lu bytes "
7076                                 "%Lu\n", rec->start, rec->nr);
7077                         ret = -EINVAL;
7078                         goto out;
7079                 }
7080         }
7081
7082         /*
7083          * I don't think this can happen currently as we'll abort() if we catch
7084          * this case higher up, but in case somebody removes that we still can't
7085          * deal with it properly here yet, so just bail out of that's the case.
7086          */
7087         if (best->bytenr != rec->start) {
7088                 fprintf(stderr, "Extent start and backref starts don't match, "
7089                         "please use btrfs-image on this file system and send "
7090                         "it to a btrfs developer so they can make fsck fix "
7091                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7092                         rec->start, rec->nr);
7093                 ret = -EINVAL;
7094                 goto out;
7095         }
7096
7097         /*
7098          * Ok great we all agreed on an extent record, let's go find the real
7099          * references and fix up the ones that don't match.
7100          */
7101         rbtree_postorder_for_each_entry_safe(back, tmp,
7102                                              &rec->backref_tree, node) {
7103                 if (back->full_backref || !back->is_data)
7104                         continue;
7105
7106                 dback = to_data_backref(back);
7107
7108                 /*
7109                  * Still ignoring backrefs that don't have a real ref attached
7110                  * to them.
7111                  */
7112                 if (dback->found_ref == 0)
7113                         continue;
7114
7115                 if (dback->bytes == best->bytes &&
7116                     dback->disk_bytenr == best->bytenr)
7117                         continue;
7118
7119                 ret = repair_ref(info, path, dback, best);
7120                 if (ret)
7121                         goto out;
7122         }
7123
7124         /*
7125          * Ok we messed with the actual refs, which means we need to drop our
7126          * entire cache and go back and rescan.  I know this is a huge pain and
7127          * adds a lot of extra work, but it's the only way to be safe.  Once all
7128          * the backrefs agree we may not need to do anything to the extent
7129          * record itself.
7130          */
7131         ret = -EAGAIN;
7132 out:
7133         while (!list_empty(&entries)) {
7134                 entry = list_entry(entries.next, struct extent_entry, list);
7135                 list_del_init(&entry->list);
7136                 free(entry);
7137         }
7138         return ret;
7139 }
7140
7141 static int process_duplicates(struct btrfs_root *root,
7142                               struct cache_tree *extent_cache,
7143                               struct extent_record *rec)
7144 {
7145         struct extent_record *good, *tmp;
7146         struct cache_extent *cache;
7147         int ret;
7148
7149         /*
7150          * If we found a extent record for this extent then return, or if we
7151          * have more than one duplicate we are likely going to need to delete
7152          * something.
7153          */
7154         if (rec->found_rec || rec->num_duplicates > 1)
7155                 return 0;
7156
7157         /* Shouldn't happen but just in case */
7158         BUG_ON(!rec->num_duplicates);
7159
7160         /*
7161          * So this happens if we end up with a backref that doesn't match the
7162          * actual extent entry.  So either the backref is bad or the extent
7163          * entry is bad.  Either way we want to have the extent_record actually
7164          * reflect what we found in the extent_tree, so we need to take the
7165          * duplicate out and use that as the extent_record since the only way we
7166          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7167          */
7168         remove_cache_extent(extent_cache, &rec->cache);
7169
7170         good = to_extent_record(rec->dups.next);
7171         list_del_init(&good->list);
7172         INIT_LIST_HEAD(&good->backrefs);
7173         INIT_LIST_HEAD(&good->dups);
7174         good->cache.start = good->start;
7175         good->cache.size = good->nr;
7176         good->content_checked = 0;
7177         good->owner_ref_checked = 0;
7178         good->num_duplicates = 0;
7179         good->refs = rec->refs;
7180         list_splice_init(&rec->backrefs, &good->backrefs);
7181         while (1) {
7182                 cache = lookup_cache_extent(extent_cache, good->start,
7183                                             good->nr);
7184                 if (!cache)
7185                         break;
7186                 tmp = container_of(cache, struct extent_record, cache);
7187
7188                 /*
7189                  * If we find another overlapping extent and it's found_rec is
7190                  * set then it's a duplicate and we need to try and delete
7191                  * something.
7192                  */
7193                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7194                         if (list_empty(&good->list))
7195                                 list_add_tail(&good->list,
7196                                               &duplicate_extents);
7197                         good->num_duplicates += tmp->num_duplicates + 1;
7198                         list_splice_init(&tmp->dups, &good->dups);
7199                         list_del_init(&tmp->list);
7200                         list_add_tail(&tmp->list, &good->dups);
7201                         remove_cache_extent(extent_cache, &tmp->cache);
7202                         continue;
7203                 }
7204
7205                 /*
7206                  * Ok we have another non extent item backed extent rec, so lets
7207                  * just add it to this extent and carry on like we did above.
7208                  */
7209                 good->refs += tmp->refs;
7210                 list_splice_init(&tmp->backrefs, &good->backrefs);
7211                 remove_cache_extent(extent_cache, &tmp->cache);
7212                 free(tmp);
7213         }
7214         ret = insert_cache_extent(extent_cache, &good->cache);
7215         BUG_ON(ret);
7216         free(rec);
7217         return good->num_duplicates ? 0 : 1;
7218 }
7219
7220 static int delete_duplicate_records(struct btrfs_root *root,
7221                                     struct extent_record *rec)
7222 {
7223         struct btrfs_trans_handle *trans;
7224         LIST_HEAD(delete_list);
7225         struct btrfs_path *path;
7226         struct extent_record *tmp, *good, *n;
7227         int nr_del = 0;
7228         int ret = 0, err;
7229         struct btrfs_key key;
7230
7231         path = btrfs_alloc_path();
7232         if (!path) {
7233                 ret = -ENOMEM;
7234                 goto out;
7235         }
7236
7237         good = rec;
7238         /* Find the record that covers all of the duplicates. */
7239         list_for_each_entry(tmp, &rec->dups, list) {
7240                 if (good->start < tmp->start)
7241                         continue;
7242                 if (good->nr > tmp->nr)
7243                         continue;
7244
7245                 if (tmp->start + tmp->nr < good->start + good->nr) {
7246                         fprintf(stderr, "Ok we have overlapping extents that "
7247                                 "aren't completely covered by each other, this "
7248                                 "is going to require more careful thought.  "
7249                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7250                                 tmp->start, tmp->nr, good->start, good->nr);
7251                         abort();
7252                 }
7253                 good = tmp;
7254         }
7255
7256         if (good != rec)
7257                 list_add_tail(&rec->list, &delete_list);
7258
7259         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7260                 if (tmp == good)
7261                         continue;
7262                 list_move_tail(&tmp->list, &delete_list);
7263         }
7264
7265         root = root->fs_info->extent_root;
7266         trans = btrfs_start_transaction(root, 1);
7267         if (IS_ERR(trans)) {
7268                 ret = PTR_ERR(trans);
7269                 goto out;
7270         }
7271
7272         list_for_each_entry(tmp, &delete_list, list) {
7273                 if (tmp->found_rec == 0)
7274                         continue;
7275                 key.objectid = tmp->start;
7276                 key.type = BTRFS_EXTENT_ITEM_KEY;
7277                 key.offset = tmp->nr;
7278
7279                 /* Shouldn't happen but just in case */
7280                 if (tmp->metadata) {
7281                         fprintf(stderr, "Well this shouldn't happen, extent "
7282                                 "record overlaps but is metadata? "
7283                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7284                         abort();
7285                 }
7286
7287                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7288                 if (ret) {
7289                         if (ret > 0)
7290                                 ret = -EINVAL;
7291                         break;
7292                 }
7293                 ret = btrfs_del_item(trans, root, path);
7294                 if (ret)
7295                         break;
7296                 btrfs_release_path(path);
7297                 nr_del++;
7298         }
7299         err = btrfs_commit_transaction(trans, root);
7300         if (err && !ret)
7301                 ret = err;
7302 out:
7303         while (!list_empty(&delete_list)) {
7304                 tmp = to_extent_record(delete_list.next);
7305                 list_del_init(&tmp->list);
7306                 if (tmp == rec)
7307                         continue;
7308                 free(tmp);
7309         }
7310
7311         while (!list_empty(&rec->dups)) {
7312                 tmp = to_extent_record(rec->dups.next);
7313                 list_del_init(&tmp->list);
7314                 free(tmp);
7315         }
7316
7317         btrfs_free_path(path);
7318
7319         if (!ret && !nr_del)
7320                 rec->num_duplicates = 0;
7321
7322         return ret ? ret : nr_del;
7323 }
7324
7325 static int find_possible_backrefs(struct btrfs_fs_info *info,
7326                                   struct btrfs_path *path,
7327                                   struct cache_tree *extent_cache,
7328                                   struct extent_record *rec)
7329 {
7330         struct btrfs_root *root;
7331         struct extent_backref *back, *tmp;
7332         struct data_backref *dback;
7333         struct cache_extent *cache;
7334         struct btrfs_file_extent_item *fi;
7335         struct btrfs_key key;
7336         u64 bytenr, bytes;
7337         int ret;
7338
7339         rbtree_postorder_for_each_entry_safe(back, tmp,
7340                                              &rec->backref_tree, node) {
7341                 /* Don't care about full backrefs (poor unloved backrefs) */
7342                 if (back->full_backref || !back->is_data)
7343                         continue;
7344
7345                 dback = to_data_backref(back);
7346
7347                 /* We found this one, we don't need to do a lookup */
7348                 if (dback->found_ref)
7349                         continue;
7350
7351                 key.objectid = dback->root;
7352                 key.type = BTRFS_ROOT_ITEM_KEY;
7353                 key.offset = (u64)-1;
7354
7355                 root = btrfs_read_fs_root(info, &key);
7356
7357                 /* No root, definitely a bad ref, skip */
7358                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7359                         continue;
7360                 /* Other err, exit */
7361                 if (IS_ERR(root))
7362                         return PTR_ERR(root);
7363
7364                 key.objectid = dback->owner;
7365                 key.type = BTRFS_EXTENT_DATA_KEY;
7366                 key.offset = dback->offset;
7367                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7368                 if (ret) {
7369                         btrfs_release_path(path);
7370                         if (ret < 0)
7371                                 return ret;
7372                         /* Didn't find it, we can carry on */
7373                         ret = 0;
7374                         continue;
7375                 }
7376
7377                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7378                                     struct btrfs_file_extent_item);
7379                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7380                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7381                 btrfs_release_path(path);
7382                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7383                 if (cache) {
7384                         struct extent_record *tmp;
7385                         tmp = container_of(cache, struct extent_record, cache);
7386
7387                         /*
7388                          * If we found an extent record for the bytenr for this
7389                          * particular backref then we can't add it to our
7390                          * current extent record.  We only want to add backrefs
7391                          * that don't have a corresponding extent item in the
7392                          * extent tree since they likely belong to this record
7393                          * and we need to fix it if it doesn't match bytenrs.
7394                          */
7395                         if  (tmp->found_rec)
7396                                 continue;
7397                 }
7398
7399                 dback->found_ref += 1;
7400                 dback->disk_bytenr = bytenr;
7401                 dback->bytes = bytes;
7402
7403                 /*
7404                  * Set this so the verify backref code knows not to trust the
7405                  * values in this backref.
7406                  */
7407                 back->broken = 1;
7408         }
7409
7410         return 0;
7411 }
7412
7413 /*
7414  * Record orphan data ref into corresponding root.
7415  *
7416  * Return 0 if the extent item contains data ref and recorded.
7417  * Return 1 if the extent item contains no useful data ref
7418  *   On that case, it may contains only shared_dataref or metadata backref
7419  *   or the file extent exists(this should be handled by the extent bytenr
7420  *   recovery routine)
7421  * Return <0 if something goes wrong.
7422  */
7423 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7424                                       struct extent_record *rec)
7425 {
7426         struct btrfs_key key;
7427         struct btrfs_root *dest_root;
7428         struct extent_backref *back, *tmp;
7429         struct data_backref *dback;
7430         struct orphan_data_extent *orphan;
7431         struct btrfs_path *path;
7432         int recorded_data_ref = 0;
7433         int ret = 0;
7434
7435         if (rec->metadata)
7436                 return 1;
7437         path = btrfs_alloc_path();
7438         if (!path)
7439                 return -ENOMEM;
7440         rbtree_postorder_for_each_entry_safe(back, tmp,
7441                                              &rec->backref_tree, node) {
7442                 if (back->full_backref || !back->is_data ||
7443                     !back->found_extent_tree)
7444                         continue;
7445                 dback = to_data_backref(back);
7446                 if (dback->found_ref)
7447                         continue;
7448                 key.objectid = dback->root;
7449                 key.type = BTRFS_ROOT_ITEM_KEY;
7450                 key.offset = (u64)-1;
7451
7452                 dest_root = btrfs_read_fs_root(fs_info, &key);
7453
7454                 /* For non-exist root we just skip it */
7455                 if (IS_ERR(dest_root) || !dest_root)
7456                         continue;
7457
7458                 key.objectid = dback->owner;
7459                 key.type = BTRFS_EXTENT_DATA_KEY;
7460                 key.offset = dback->offset;
7461
7462                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7463                 /*
7464                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7465                  * we need to record it for inode/file extent rebuild.
7466                  * For ret > 0, we record it only for file extent rebuild.
7467                  * For ret == 0, the file extent exists but only bytenr
7468                  * mismatch, let the original bytenr fix routine to handle,
7469                  * don't record it.
7470                  */
7471                 if (ret == 0)
7472                         continue;
7473                 ret = 0;
7474                 orphan = malloc(sizeof(*orphan));
7475                 if (!orphan) {
7476                         ret = -ENOMEM;
7477                         goto out;
7478                 }
7479                 INIT_LIST_HEAD(&orphan->list);
7480                 orphan->root = dback->root;
7481                 orphan->objectid = dback->owner;
7482                 orphan->offset = dback->offset;
7483                 orphan->disk_bytenr = rec->cache.start;
7484                 orphan->disk_len = rec->cache.size;
7485                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7486                 recorded_data_ref = 1;
7487         }
7488 out:
7489         btrfs_free_path(path);
7490         if (!ret)
7491                 return !recorded_data_ref;
7492         else
7493                 return ret;
7494 }
7495
7496 /*
7497  * when an incorrect extent item is found, this will delete
7498  * all of the existing entries for it and recreate them
7499  * based on what the tree scan found.
7500  */
7501 static int fixup_extent_refs(struct btrfs_fs_info *info,
7502                              struct cache_tree *extent_cache,
7503                              struct extent_record *rec)
7504 {
7505         struct btrfs_trans_handle *trans = NULL;
7506         int ret;
7507         struct btrfs_path *path;
7508         struct cache_extent *cache;
7509         struct extent_backref *back, *tmp;
7510         int allocated = 0;
7511         u64 flags = 0;
7512
7513         if (rec->flag_block_full_backref)
7514                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7515
7516         path = btrfs_alloc_path();
7517         if (!path)
7518                 return -ENOMEM;
7519
7520         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7521                 /*
7522                  * Sometimes the backrefs themselves are so broken they don't
7523                  * get attached to any meaningful rec, so first go back and
7524                  * check any of our backrefs that we couldn't find and throw
7525                  * them into the list if we find the backref so that
7526                  * verify_backrefs can figure out what to do.
7527                  */
7528                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7529                 if (ret < 0)
7530                         goto out;
7531         }
7532
7533         /* step one, make sure all of the backrefs agree */
7534         ret = verify_backrefs(info, path, rec);
7535         if (ret < 0)
7536                 goto out;
7537
7538         trans = btrfs_start_transaction(info->extent_root, 1);
7539         if (IS_ERR(trans)) {
7540                 ret = PTR_ERR(trans);
7541                 goto out;
7542         }
7543
7544         /* step two, delete all the existing records */
7545         ret = delete_extent_records(trans, info->extent_root, path,
7546                                     rec->start, rec->max_size);
7547
7548         if (ret < 0)
7549                 goto out;
7550
7551         /* was this block corrupt?  If so, don't add references to it */
7552         cache = lookup_cache_extent(info->corrupt_blocks,
7553                                     rec->start, rec->max_size);
7554         if (cache) {
7555                 ret = 0;
7556                 goto out;
7557         }
7558
7559         /* step three, recreate all the refs we did find */
7560         rbtree_postorder_for_each_entry_safe(back, tmp,
7561                                              &rec->backref_tree, node) {
7562                 /*
7563                  * if we didn't find any references, don't create a
7564                  * new extent record
7565                  */
7566                 if (!back->found_ref)
7567                         continue;
7568
7569                 rec->bad_full_backref = 0;
7570                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7571                 allocated = 1;
7572
7573                 if (ret)
7574                         goto out;
7575         }
7576 out:
7577         if (trans) {
7578                 int err = btrfs_commit_transaction(trans, info->extent_root);
7579                 if (!ret)
7580                         ret = err;
7581         }
7582
7583         btrfs_free_path(path);
7584         return ret;
7585 }
7586
7587 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7588                               struct extent_record *rec)
7589 {
7590         struct btrfs_trans_handle *trans;
7591         struct btrfs_root *root = fs_info->extent_root;
7592         struct btrfs_path *path;
7593         struct btrfs_extent_item *ei;
7594         struct btrfs_key key;
7595         u64 flags;
7596         int ret = 0;
7597
7598         key.objectid = rec->start;
7599         if (rec->metadata) {
7600                 key.type = BTRFS_METADATA_ITEM_KEY;
7601                 key.offset = rec->info_level;
7602         } else {
7603                 key.type = BTRFS_EXTENT_ITEM_KEY;
7604                 key.offset = rec->max_size;
7605         }
7606
7607         path = btrfs_alloc_path();
7608         if (!path)
7609                 return -ENOMEM;
7610
7611         trans = btrfs_start_transaction(root, 0);
7612         if (IS_ERR(trans)) {
7613                 btrfs_free_path(path);
7614                 return PTR_ERR(trans);
7615         }
7616
7617         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7618         if (ret < 0) {
7619                 btrfs_free_path(path);
7620                 btrfs_commit_transaction(trans, root);
7621                 return ret;
7622         } else if (ret) {
7623                 fprintf(stderr, "Didn't find extent for %llu\n",
7624                         (unsigned long long)rec->start);
7625                 btrfs_free_path(path);
7626                 btrfs_commit_transaction(trans, root);
7627                 return -ENOENT;
7628         }
7629
7630         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7631                             struct btrfs_extent_item);
7632         flags = btrfs_extent_flags(path->nodes[0], ei);
7633         if (rec->flag_block_full_backref) {
7634                 fprintf(stderr, "setting full backref on %llu\n",
7635                         (unsigned long long)key.objectid);
7636                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7637         } else {
7638                 fprintf(stderr, "clearing full backref on %llu\n",
7639                         (unsigned long long)key.objectid);
7640                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7641         }
7642         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7643         btrfs_mark_buffer_dirty(path->nodes[0]);
7644         btrfs_free_path(path);
7645         return btrfs_commit_transaction(trans, root);
7646 }
7647
7648 /* right now we only prune from the extent allocation tree */
7649 static int prune_one_block(struct btrfs_trans_handle *trans,
7650                            struct btrfs_fs_info *info,
7651                            struct btrfs_corrupt_block *corrupt)
7652 {
7653         int ret;
7654         struct btrfs_path path;
7655         struct extent_buffer *eb;
7656         u64 found;
7657         int slot;
7658         int nritems;
7659         int level = corrupt->level + 1;
7660
7661         btrfs_init_path(&path);
7662 again:
7663         /* we want to stop at the parent to our busted block */
7664         path.lowest_level = level;
7665
7666         ret = btrfs_search_slot(trans, info->extent_root,
7667                                 &corrupt->key, &path, -1, 1);
7668
7669         if (ret < 0)
7670                 goto out;
7671
7672         eb = path.nodes[level];
7673         if (!eb) {
7674                 ret = -ENOENT;
7675                 goto out;
7676         }
7677
7678         /*
7679          * hopefully the search gave us the block we want to prune,
7680          * lets try that first
7681          */
7682         slot = path.slots[level];
7683         found =  btrfs_node_blockptr(eb, slot);
7684         if (found == corrupt->cache.start)
7685                 goto del_ptr;
7686
7687         nritems = btrfs_header_nritems(eb);
7688
7689         /* the search failed, lets scan this node and hope we find it */
7690         for (slot = 0; slot < nritems; slot++) {
7691                 found =  btrfs_node_blockptr(eb, slot);
7692                 if (found == corrupt->cache.start)
7693                         goto del_ptr;
7694         }
7695         /*
7696          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7697          * to this block
7698          */
7699         if (eb == info->extent_root->node) {
7700                 ret = -ENOENT;
7701                 goto out;
7702         } else {
7703                 level++;
7704                 btrfs_release_path(&path);
7705                 goto again;
7706         }
7707
7708 del_ptr:
7709         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7710         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7711
7712 out:
7713         btrfs_release_path(&path);
7714         return ret;
7715 }
7716
7717 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7718 {
7719         struct btrfs_trans_handle *trans = NULL;
7720         struct cache_extent *cache;
7721         struct btrfs_corrupt_block *corrupt;
7722
7723         while (1) {
7724                 cache = search_cache_extent(info->corrupt_blocks, 0);
7725                 if (!cache)
7726                         break;
7727                 if (!trans) {
7728                         trans = btrfs_start_transaction(info->extent_root, 1);
7729                         if (IS_ERR(trans))
7730                                 return PTR_ERR(trans);
7731                 }
7732                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7733                 prune_one_block(trans, info, corrupt);
7734                 remove_cache_extent(info->corrupt_blocks, cache);
7735         }
7736         if (trans)
7737                 return btrfs_commit_transaction(trans, info->extent_root);
7738         return 0;
7739 }
7740
7741 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7742 {
7743         struct btrfs_block_group_cache *cache;
7744         u64 start, end;
7745         int ret;
7746
7747         while (1) {
7748                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7749                                             &start, &end, EXTENT_DIRTY);
7750                 if (ret)
7751                         break;
7752                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7753                                    GFP_NOFS);
7754         }
7755
7756         start = 0;
7757         while (1) {
7758                 cache = btrfs_lookup_first_block_group(fs_info, start);
7759                 if (!cache)
7760                         break;
7761                 if (cache->cached)
7762                         cache->cached = 0;
7763                 start = cache->key.objectid + cache->key.offset;
7764         }
7765 }
7766
7767 static int check_extent_refs(struct btrfs_root *root,
7768                              struct cache_tree *extent_cache)
7769 {
7770         struct extent_record *rec;
7771         struct cache_extent *cache;
7772         int err = 0;
7773         int ret = 0;
7774         int fixed = 0;
7775         int had_dups = 0;
7776         int recorded = 0;
7777
7778         if (repair) {
7779                 /*
7780                  * if we're doing a repair, we have to make sure
7781                  * we don't allocate from the problem extents.
7782                  * In the worst case, this will be all the
7783                  * extents in the FS
7784                  */
7785                 cache = search_cache_extent(extent_cache, 0);
7786                 while(cache) {
7787                         rec = container_of(cache, struct extent_record, cache);
7788                         set_extent_dirty(root->fs_info->excluded_extents,
7789                                          rec->start,
7790                                          rec->start + rec->max_size - 1,
7791                                          GFP_NOFS);
7792                         cache = next_cache_extent(cache);
7793                 }
7794
7795                 /* pin down all the corrupted blocks too */
7796                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7797                 while(cache) {
7798                         set_extent_dirty(root->fs_info->excluded_extents,
7799                                          cache->start,
7800                                          cache->start + cache->size - 1,
7801                                          GFP_NOFS);
7802                         cache = next_cache_extent(cache);
7803                 }
7804                 prune_corrupt_blocks(root->fs_info);
7805                 reset_cached_block_groups(root->fs_info);
7806         }
7807
7808         reset_cached_block_groups(root->fs_info);
7809
7810         /*
7811          * We need to delete any duplicate entries we find first otherwise we
7812          * could mess up the extent tree when we have backrefs that actually
7813          * belong to a different extent item and not the weird duplicate one.
7814          */
7815         while (repair && !list_empty(&duplicate_extents)) {
7816                 rec = to_extent_record(duplicate_extents.next);
7817                 list_del_init(&rec->list);
7818
7819                 /* Sometimes we can find a backref before we find an actual
7820                  * extent, so we need to process it a little bit to see if there
7821                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7822                  * if this is a backref screwup.  If we need to delete stuff
7823                  * process_duplicates() will return 0, otherwise it will return
7824                  * 1 and we
7825                  */
7826                 if (process_duplicates(root, extent_cache, rec))
7827                         continue;
7828                 ret = delete_duplicate_records(root, rec);
7829                 if (ret < 0)
7830                         return ret;
7831                 /*
7832                  * delete_duplicate_records will return the number of entries
7833                  * deleted, so if it's greater than 0 then we know we actually
7834                  * did something and we need to remove.
7835                  */
7836                 if (ret)
7837                         had_dups = 1;
7838         }
7839
7840         if (had_dups)
7841                 return -EAGAIN;
7842
7843         while(1) {
7844                 int cur_err = 0;
7845
7846                 fixed = 0;
7847                 recorded = 0;
7848                 cache = search_cache_extent(extent_cache, 0);
7849                 if (!cache)
7850                         break;
7851                 rec = container_of(cache, struct extent_record, cache);
7852                 if (rec->num_duplicates) {
7853                         fprintf(stderr, "extent item %llu has multiple extent "
7854                                 "items\n", (unsigned long long)rec->start);
7855                         err = 1;
7856                         cur_err = 1;
7857                 }
7858
7859                 if (rec->refs != rec->extent_item_refs) {
7860                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7861                                 (unsigned long long)rec->start,
7862                                 (unsigned long long)rec->nr);
7863                         fprintf(stderr, "extent item %llu, found %llu\n",
7864                                 (unsigned long long)rec->extent_item_refs,
7865                                 (unsigned long long)rec->refs);
7866                         ret = record_orphan_data_extents(root->fs_info, rec);
7867                         if (ret < 0)
7868                                 goto repair_abort;
7869                         if (ret == 0) {
7870                                 recorded = 1;
7871                         } else {
7872                                 /*
7873                                  * we can't use the extent to repair file
7874                                  * extent, let the fallback method handle it.
7875                                  */
7876                                 if (!fixed && repair) {
7877                                         ret = fixup_extent_refs(
7878                                                         root->fs_info,
7879                                                         extent_cache, rec);
7880                                         if (ret)
7881                                                 goto repair_abort;
7882                                         fixed = 1;
7883                                 }
7884                         }
7885                         err = 1;
7886                         cur_err = 1;
7887                 }
7888                 if (all_backpointers_checked(rec, 1)) {
7889                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7890                                 (unsigned long long)rec->start,
7891                                 (unsigned long long)rec->nr);
7892
7893                         if (!fixed && !recorded && repair) {
7894                                 ret = fixup_extent_refs(root->fs_info,
7895                                                         extent_cache, rec);
7896                                 if (ret)
7897                                         goto repair_abort;
7898                                 fixed = 1;
7899                         }
7900                         cur_err = 1;
7901                         err = 1;
7902                 }
7903                 if (!rec->owner_ref_checked) {
7904                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7905                                 (unsigned long long)rec->start,
7906                                 (unsigned long long)rec->nr);
7907                         if (!fixed && !recorded && repair) {
7908                                 ret = fixup_extent_refs(root->fs_info,
7909                                                         extent_cache, rec);
7910                                 if (ret)
7911                                         goto repair_abort;
7912                                 fixed = 1;
7913                         }
7914                         err = 1;
7915                         cur_err = 1;
7916                 }
7917                 if (rec->bad_full_backref) {
7918                         fprintf(stderr, "bad full backref, on [%llu]\n",
7919                                 (unsigned long long)rec->start);
7920                         if (repair) {
7921                                 ret = fixup_extent_flags(root->fs_info, rec);
7922                                 if (ret)
7923                                         goto repair_abort;
7924                                 fixed = 1;
7925                         }
7926                         err = 1;
7927                         cur_err = 1;
7928                 }
7929                 /*
7930                  * Although it's not a extent ref's problem, we reuse this
7931                  * routine for error reporting.
7932                  * No repair function yet.
7933                  */
7934                 if (rec->crossing_stripes) {
7935                         fprintf(stderr,
7936                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7937                                 rec->start, rec->start + rec->max_size);
7938                         err = 1;
7939                         cur_err = 1;
7940                 }
7941
7942                 if (rec->wrong_chunk_type) {
7943                         fprintf(stderr,
7944                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7945                                 rec->start, rec->start + rec->max_size);
7946                         err = 1;
7947                         cur_err = 1;
7948                 }
7949
7950                 remove_cache_extent(extent_cache, cache);
7951                 free_all_extent_backrefs(rec);
7952                 if (!init_extent_tree && repair && (!cur_err || fixed))
7953                         clear_extent_dirty(root->fs_info->excluded_extents,
7954                                            rec->start,
7955                                            rec->start + rec->max_size - 1,
7956                                            GFP_NOFS);
7957                 free(rec);
7958         }
7959 repair_abort:
7960         if (repair) {
7961                 if (ret && ret != -EAGAIN) {
7962                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7963                         exit(1);
7964                 } else if (!ret) {
7965                         struct btrfs_trans_handle *trans;
7966
7967                         root = root->fs_info->extent_root;
7968                         trans = btrfs_start_transaction(root, 1);
7969                         if (IS_ERR(trans)) {
7970                                 ret = PTR_ERR(trans);
7971                                 goto repair_abort;
7972                         }
7973
7974                         btrfs_fix_block_accounting(trans, root);
7975                         ret = btrfs_commit_transaction(trans, root);
7976                         if (ret)
7977                                 goto repair_abort;
7978                 }
7979                 if (err)
7980                         fprintf(stderr, "repaired damaged extent references\n");
7981                 return ret;
7982         }
7983         return err;
7984 }
7985
7986 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7987 {
7988         u64 stripe_size;
7989
7990         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7991                 stripe_size = length;
7992                 stripe_size /= num_stripes;
7993         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7994                 stripe_size = length * 2;
7995                 stripe_size /= num_stripes;
7996         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7997                 stripe_size = length;
7998                 stripe_size /= (num_stripes - 1);
7999         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8000                 stripe_size = length;
8001                 stripe_size /= (num_stripes - 2);
8002         } else {
8003                 stripe_size = length;
8004         }
8005         return stripe_size;
8006 }
8007
8008 /*
8009  * Check the chunk with its block group/dev list ref:
8010  * Return 0 if all refs seems valid.
8011  * Return 1 if part of refs seems valid, need later check for rebuild ref
8012  * like missing block group and needs to search extent tree to rebuild them.
8013  * Return -1 if essential refs are missing and unable to rebuild.
8014  */
8015 static int check_chunk_refs(struct chunk_record *chunk_rec,
8016                             struct block_group_tree *block_group_cache,
8017                             struct device_extent_tree *dev_extent_cache,
8018                             int silent)
8019 {
8020         struct cache_extent *block_group_item;
8021         struct block_group_record *block_group_rec;
8022         struct cache_extent *dev_extent_item;
8023         struct device_extent_record *dev_extent_rec;
8024         u64 devid;
8025         u64 offset;
8026         u64 length;
8027         int metadump_v2 = 0;
8028         int i;
8029         int ret = 0;
8030
8031         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8032                                                chunk_rec->offset,
8033                                                chunk_rec->length);
8034         if (block_group_item) {
8035                 block_group_rec = container_of(block_group_item,
8036                                                struct block_group_record,
8037                                                cache);
8038                 if (chunk_rec->length != block_group_rec->offset ||
8039                     chunk_rec->offset != block_group_rec->objectid ||
8040                     (!metadump_v2 &&
8041                      chunk_rec->type_flags != block_group_rec->flags)) {
8042                         if (!silent)
8043                                 fprintf(stderr,
8044                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8045                                         chunk_rec->objectid,
8046                                         chunk_rec->type,
8047                                         chunk_rec->offset,
8048                                         chunk_rec->length,
8049                                         chunk_rec->offset,
8050                                         chunk_rec->type_flags,
8051                                         block_group_rec->objectid,
8052                                         block_group_rec->type,
8053                                         block_group_rec->offset,
8054                                         block_group_rec->offset,
8055                                         block_group_rec->objectid,
8056                                         block_group_rec->flags);
8057                         ret = -1;
8058                 } else {
8059                         list_del_init(&block_group_rec->list);
8060                         chunk_rec->bg_rec = block_group_rec;
8061                 }
8062         } else {
8063                 if (!silent)
8064                         fprintf(stderr,
8065                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8066                                 chunk_rec->objectid,
8067                                 chunk_rec->type,
8068                                 chunk_rec->offset,
8069                                 chunk_rec->length,
8070                                 chunk_rec->offset,
8071                                 chunk_rec->type_flags);
8072                 ret = 1;
8073         }
8074
8075         if (metadump_v2)
8076                 return ret;
8077
8078         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8079                                     chunk_rec->num_stripes);
8080         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8081                 devid = chunk_rec->stripes[i].devid;
8082                 offset = chunk_rec->stripes[i].offset;
8083                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8084                                                        devid, offset, length);
8085                 if (dev_extent_item) {
8086                         dev_extent_rec = container_of(dev_extent_item,
8087                                                 struct device_extent_record,
8088                                                 cache);
8089                         if (dev_extent_rec->objectid != devid ||
8090                             dev_extent_rec->offset != offset ||
8091                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8092                             dev_extent_rec->length != length) {
8093                                 if (!silent)
8094                                         fprintf(stderr,
8095                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8096                                                 chunk_rec->objectid,
8097                                                 chunk_rec->type,
8098                                                 chunk_rec->offset,
8099                                                 chunk_rec->stripes[i].devid,
8100                                                 chunk_rec->stripes[i].offset,
8101                                                 dev_extent_rec->objectid,
8102                                                 dev_extent_rec->offset,
8103                                                 dev_extent_rec->length);
8104                                 ret = -1;
8105                         } else {
8106                                 list_move(&dev_extent_rec->chunk_list,
8107                                           &chunk_rec->dextents);
8108                         }
8109                 } else {
8110                         if (!silent)
8111                                 fprintf(stderr,
8112                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8113                                         chunk_rec->objectid,
8114                                         chunk_rec->type,
8115                                         chunk_rec->offset,
8116                                         chunk_rec->stripes[i].devid,
8117                                         chunk_rec->stripes[i].offset);
8118                         ret = -1;
8119                 }
8120         }
8121         return ret;
8122 }
8123
8124 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8125 int check_chunks(struct cache_tree *chunk_cache,
8126                  struct block_group_tree *block_group_cache,
8127                  struct device_extent_tree *dev_extent_cache,
8128                  struct list_head *good, struct list_head *bad,
8129                  struct list_head *rebuild, int silent)
8130 {
8131         struct cache_extent *chunk_item;
8132         struct chunk_record *chunk_rec;
8133         struct block_group_record *bg_rec;
8134         struct device_extent_record *dext_rec;
8135         int err;
8136         int ret = 0;
8137
8138         chunk_item = first_cache_extent(chunk_cache);
8139         while (chunk_item) {
8140                 chunk_rec = container_of(chunk_item, struct chunk_record,
8141                                          cache);
8142                 err = check_chunk_refs(chunk_rec, block_group_cache,
8143                                        dev_extent_cache, silent);
8144                 if (err < 0)
8145                         ret = err;
8146                 if (err == 0 && good)
8147                         list_add_tail(&chunk_rec->list, good);
8148                 if (err > 0 && rebuild)
8149                         list_add_tail(&chunk_rec->list, rebuild);
8150                 if (err < 0 && bad)
8151                         list_add_tail(&chunk_rec->list, bad);
8152                 chunk_item = next_cache_extent(chunk_item);
8153         }
8154
8155         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8156                 if (!silent)
8157                         fprintf(stderr,
8158                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8159                                 bg_rec->objectid,
8160                                 bg_rec->offset,
8161                                 bg_rec->flags);
8162                 if (!ret)
8163                         ret = 1;
8164         }
8165
8166         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8167                             chunk_list) {
8168                 if (!silent)
8169                         fprintf(stderr,
8170                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8171                                 dext_rec->objectid,
8172                                 dext_rec->offset,
8173                                 dext_rec->length);
8174                 if (!ret)
8175                         ret = 1;
8176         }
8177         return ret;
8178 }
8179
8180
8181 static int check_device_used(struct device_record *dev_rec,
8182                              struct device_extent_tree *dext_cache)
8183 {
8184         struct cache_extent *cache;
8185         struct device_extent_record *dev_extent_rec;
8186         u64 total_byte = 0;
8187
8188         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8189         while (cache) {
8190                 dev_extent_rec = container_of(cache,
8191                                               struct device_extent_record,
8192                                               cache);
8193                 if (dev_extent_rec->objectid != dev_rec->devid)
8194                         break;
8195
8196                 list_del_init(&dev_extent_rec->device_list);
8197                 total_byte += dev_extent_rec->length;
8198                 cache = next_cache_extent(cache);
8199         }
8200
8201         if (total_byte != dev_rec->byte_used) {
8202                 fprintf(stderr,
8203                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8204                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8205                         dev_rec->type, dev_rec->offset);
8206                 return -1;
8207         } else {
8208                 return 0;
8209         }
8210 }
8211
8212 /* check btrfs_dev_item -> btrfs_dev_extent */
8213 static int check_devices(struct rb_root *dev_cache,
8214                          struct device_extent_tree *dev_extent_cache)
8215 {
8216         struct rb_node *dev_node;
8217         struct device_record *dev_rec;
8218         struct device_extent_record *dext_rec;
8219         int err;
8220         int ret = 0;
8221
8222         dev_node = rb_first(dev_cache);
8223         while (dev_node) {
8224                 dev_rec = container_of(dev_node, struct device_record, node);
8225                 err = check_device_used(dev_rec, dev_extent_cache);
8226                 if (err)
8227                         ret = err;
8228
8229                 dev_node = rb_next(dev_node);
8230         }
8231         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8232                             device_list) {
8233                 fprintf(stderr,
8234                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8235                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8236                 if (!ret)
8237                         ret = 1;
8238         }
8239         return ret;
8240 }
8241
8242 static int add_root_item_to_list(struct list_head *head,
8243                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8244                                   u8 level, u8 drop_level,
8245                                   int level_size, struct btrfs_key *drop_key)
8246 {
8247
8248         struct root_item_record *ri_rec;
8249         ri_rec = malloc(sizeof(*ri_rec));
8250         if (!ri_rec)
8251                 return -ENOMEM;
8252         ri_rec->bytenr = bytenr;
8253         ri_rec->objectid = objectid;
8254         ri_rec->level = level;
8255         ri_rec->level_size = level_size;
8256         ri_rec->drop_level = drop_level;
8257         ri_rec->last_snapshot = last_snapshot;
8258         if (drop_key)
8259                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8260         list_add_tail(&ri_rec->list, head);
8261
8262         return 0;
8263 }
8264
8265 static void free_root_item_list(struct list_head *list)
8266 {
8267         struct root_item_record *ri_rec;
8268
8269         while (!list_empty(list)) {
8270                 ri_rec = list_first_entry(list, struct root_item_record,
8271                                           list);
8272                 list_del_init(&ri_rec->list);
8273                 free(ri_rec);
8274         }
8275 }
8276
8277 static int deal_root_from_list(struct list_head *list,
8278                                struct btrfs_root *root,
8279                                struct block_info *bits,
8280                                int bits_nr,
8281                                struct cache_tree *pending,
8282                                struct cache_tree *seen,
8283                                struct cache_tree *reada,
8284                                struct cache_tree *nodes,
8285                                struct cache_tree *extent_cache,
8286                                struct cache_tree *chunk_cache,
8287                                struct rb_root *dev_cache,
8288                                struct block_group_tree *block_group_cache,
8289                                struct device_extent_tree *dev_extent_cache)
8290 {
8291         int ret = 0;
8292         u64 last;
8293
8294         while (!list_empty(list)) {
8295                 struct root_item_record *rec;
8296                 struct extent_buffer *buf;
8297                 rec = list_entry(list->next,
8298                                  struct root_item_record, list);
8299                 last = 0;
8300                 buf = read_tree_block(root->fs_info->tree_root,
8301                                       rec->bytenr, rec->level_size, 0);
8302                 if (!extent_buffer_uptodate(buf)) {
8303                         free_extent_buffer(buf);
8304                         ret = -EIO;
8305                         break;
8306                 }
8307                 add_root_to_pending(buf, extent_cache, pending,
8308                                     seen, nodes, rec->objectid);
8309                 /*
8310                  * To rebuild extent tree, we need deal with snapshot
8311                  * one by one, otherwise we deal with node firstly which
8312                  * can maximize readahead.
8313                  */
8314                 while (1) {
8315                         ret = run_next_block(root, bits, bits_nr, &last,
8316                                              pending, seen, reada, nodes,
8317                                              extent_cache, chunk_cache,
8318                                              dev_cache, block_group_cache,
8319                                              dev_extent_cache, rec);
8320                         if (ret != 0)
8321                                 break;
8322                 }
8323                 free_extent_buffer(buf);
8324                 list_del(&rec->list);
8325                 free(rec);
8326                 if (ret < 0)
8327                         break;
8328         }
8329         while (ret >= 0) {
8330                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8331                                      reada, nodes, extent_cache, chunk_cache,
8332                                      dev_cache, block_group_cache,
8333                                      dev_extent_cache, NULL);
8334                 if (ret != 0) {
8335                         if (ret > 0)
8336                                 ret = 0;
8337                         break;
8338                 }
8339         }
8340         return ret;
8341 }
8342
8343 static int check_chunks_and_extents(struct btrfs_root *root)
8344 {
8345         struct rb_root dev_cache;
8346         struct cache_tree chunk_cache;
8347         struct block_group_tree block_group_cache;
8348         struct device_extent_tree dev_extent_cache;
8349         struct cache_tree extent_cache;
8350         struct cache_tree seen;
8351         struct cache_tree pending;
8352         struct cache_tree reada;
8353         struct cache_tree nodes;
8354         struct extent_io_tree excluded_extents;
8355         struct cache_tree corrupt_blocks;
8356         struct btrfs_path path;
8357         struct btrfs_key key;
8358         struct btrfs_key found_key;
8359         int ret, err = 0;
8360         struct block_info *bits;
8361         int bits_nr;
8362         struct extent_buffer *leaf;
8363         int slot;
8364         struct btrfs_root_item ri;
8365         struct list_head dropping_trees;
8366         struct list_head normal_trees;
8367         struct btrfs_root *root1;
8368         u64 objectid;
8369         u32 level_size;
8370         u8 level;
8371
8372         dev_cache = RB_ROOT;
8373         cache_tree_init(&chunk_cache);
8374         block_group_tree_init(&block_group_cache);
8375         device_extent_tree_init(&dev_extent_cache);
8376
8377         cache_tree_init(&extent_cache);
8378         cache_tree_init(&seen);
8379         cache_tree_init(&pending);
8380         cache_tree_init(&nodes);
8381         cache_tree_init(&reada);
8382         cache_tree_init(&corrupt_blocks);
8383         extent_io_tree_init(&excluded_extents);
8384         INIT_LIST_HEAD(&dropping_trees);
8385         INIT_LIST_HEAD(&normal_trees);
8386
8387         if (repair) {
8388                 root->fs_info->excluded_extents = &excluded_extents;
8389                 root->fs_info->fsck_extent_cache = &extent_cache;
8390                 root->fs_info->free_extent_hook = free_extent_hook;
8391                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8392         }
8393
8394         bits_nr = 1024;
8395         bits = malloc(bits_nr * sizeof(struct block_info));
8396         if (!bits) {
8397                 perror("malloc");
8398                 exit(1);
8399         }
8400
8401         if (ctx.progress_enabled) {
8402                 ctx.tp = TASK_EXTENTS;
8403                 task_start(ctx.info);
8404         }
8405
8406 again:
8407         root1 = root->fs_info->tree_root;
8408         level = btrfs_header_level(root1->node);
8409         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8410                                     root1->node->start, 0, level, 0,
8411                                     root1->nodesize, NULL);
8412         if (ret < 0)
8413                 goto out;
8414         root1 = root->fs_info->chunk_root;
8415         level = btrfs_header_level(root1->node);
8416         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8417                                     root1->node->start, 0, level, 0,
8418                                     root1->nodesize, NULL);
8419         if (ret < 0)
8420                 goto out;
8421         btrfs_init_path(&path);
8422         key.offset = 0;
8423         key.objectid = 0;
8424         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8425         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8426                                         &key, &path, 0, 0);
8427         if (ret < 0)
8428                 goto out;
8429         while(1) {
8430                 leaf = path.nodes[0];
8431                 slot = path.slots[0];
8432                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8433                         ret = btrfs_next_leaf(root, &path);
8434                         if (ret != 0)
8435                                 break;
8436                         leaf = path.nodes[0];
8437                         slot = path.slots[0];
8438                 }
8439                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8440                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8441                         unsigned long offset;
8442                         u64 last_snapshot;
8443
8444                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8445                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8446                         last_snapshot = btrfs_root_last_snapshot(&ri);
8447                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8448                                 level = btrfs_root_level(&ri);
8449                                 level_size = root->nodesize;
8450                                 ret = add_root_item_to_list(&normal_trees,
8451                                                 found_key.objectid,
8452                                                 btrfs_root_bytenr(&ri),
8453                                                 last_snapshot, level,
8454                                                 0, level_size, NULL);
8455                                 if (ret < 0)
8456                                         goto out;
8457                         } else {
8458                                 level = btrfs_root_level(&ri);
8459                                 level_size = root->nodesize;
8460                                 objectid = found_key.objectid;
8461                                 btrfs_disk_key_to_cpu(&found_key,
8462                                                       &ri.drop_progress);
8463                                 ret = add_root_item_to_list(&dropping_trees,
8464                                                 objectid,
8465                                                 btrfs_root_bytenr(&ri),
8466                                                 last_snapshot, level,
8467                                                 ri.drop_level,
8468                                                 level_size, &found_key);
8469                                 if (ret < 0)
8470                                         goto out;
8471                         }
8472                 }
8473                 path.slots[0]++;
8474         }
8475         btrfs_release_path(&path);
8476
8477         /*
8478          * check_block can return -EAGAIN if it fixes something, please keep
8479          * this in mind when dealing with return values from these functions, if
8480          * we get -EAGAIN we want to fall through and restart the loop.
8481          */
8482         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8483                                   &seen, &reada, &nodes, &extent_cache,
8484                                   &chunk_cache, &dev_cache, &block_group_cache,
8485                                   &dev_extent_cache);
8486         if (ret < 0) {
8487                 if (ret == -EAGAIN)
8488                         goto loop;
8489                 goto out;
8490         }
8491         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8492                                   &pending, &seen, &reada, &nodes,
8493                                   &extent_cache, &chunk_cache, &dev_cache,
8494                                   &block_group_cache, &dev_extent_cache);
8495         if (ret < 0) {
8496                 if (ret == -EAGAIN)
8497                         goto loop;
8498                 goto out;
8499         }
8500
8501         ret = check_chunks(&chunk_cache, &block_group_cache,
8502                            &dev_extent_cache, NULL, NULL, NULL, 0);
8503         if (ret) {
8504                 if (ret == -EAGAIN)
8505                         goto loop;
8506                 err = ret;
8507         }
8508
8509         ret = check_extent_refs(root, &extent_cache);
8510         if (ret < 0) {
8511                 if (ret == -EAGAIN)
8512                         goto loop;
8513                 goto out;
8514         }
8515
8516         ret = check_devices(&dev_cache, &dev_extent_cache);
8517         if (ret && err)
8518                 ret = err;
8519
8520 out:
8521         task_stop(ctx.info);
8522         if (repair) {
8523                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8524                 extent_io_tree_cleanup(&excluded_extents);
8525                 root->fs_info->fsck_extent_cache = NULL;
8526                 root->fs_info->free_extent_hook = NULL;
8527                 root->fs_info->corrupt_blocks = NULL;
8528                 root->fs_info->excluded_extents = NULL;
8529         }
8530         free(bits);
8531         free_chunk_cache_tree(&chunk_cache);
8532         free_device_cache_tree(&dev_cache);
8533         free_block_group_tree(&block_group_cache);
8534         free_device_extent_tree(&dev_extent_cache);
8535         free_extent_cache_tree(&seen);
8536         free_extent_cache_tree(&pending);
8537         free_extent_cache_tree(&reada);
8538         free_extent_cache_tree(&nodes);
8539         return ret;
8540 loop:
8541         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8542         free_extent_cache_tree(&seen);
8543         free_extent_cache_tree(&pending);
8544         free_extent_cache_tree(&reada);
8545         free_extent_cache_tree(&nodes);
8546         free_chunk_cache_tree(&chunk_cache);
8547         free_block_group_tree(&block_group_cache);
8548         free_device_cache_tree(&dev_cache);
8549         free_device_extent_tree(&dev_extent_cache);
8550         free_extent_record_cache(root->fs_info, &extent_cache);
8551         free_root_item_list(&normal_trees);
8552         free_root_item_list(&dropping_trees);
8553         extent_io_tree_cleanup(&excluded_extents);
8554         goto again;
8555 }
8556
8557 /*
8558  * Check backrefs of a tree block given by @bytenr or @eb.
8559  *
8560  * @root:       the root containing the @bytenr or @eb
8561  * @eb:         tree block extent buffer, can be NULL
8562  * @bytenr:     bytenr of the tree block to search
8563  * @level:      tree level of the tree block
8564  * @owner:      owner of the tree block
8565  *
8566  * Return >0 for any error found and output error message
8567  * Return 0 for no error found
8568  */
8569 static int check_tree_block_ref(struct btrfs_root *root,
8570                                 struct extent_buffer *eb, u64 bytenr,
8571                                 int level, u64 owner)
8572 {
8573         struct btrfs_key key;
8574         struct btrfs_root *extent_root = root->fs_info->extent_root;
8575         struct btrfs_path path;
8576         struct btrfs_extent_item *ei;
8577         struct btrfs_extent_inline_ref *iref;
8578         struct extent_buffer *leaf;
8579         unsigned long end;
8580         unsigned long ptr;
8581         int slot;
8582         int skinny_level;
8583         int type;
8584         u32 nodesize = root->nodesize;
8585         u32 item_size;
8586         u64 offset;
8587         int found_ref = 0;
8588         int err = 0;
8589         int ret;
8590
8591         btrfs_init_path(&path);
8592         key.objectid = bytenr;
8593         if (btrfs_fs_incompat(root->fs_info,
8594                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8595                 key.type = BTRFS_METADATA_ITEM_KEY;
8596         else
8597                 key.type = BTRFS_EXTENT_ITEM_KEY;
8598         key.offset = (u64)-1;
8599
8600         /* Search for the backref in extent tree */
8601         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8602         if (ret < 0) {
8603                 err |= BACKREF_MISSING;
8604                 goto out;
8605         }
8606         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8607         if (ret) {
8608                 err |= BACKREF_MISSING;
8609                 goto out;
8610         }
8611
8612         leaf = path.nodes[0];
8613         slot = path.slots[0];
8614         btrfs_item_key_to_cpu(leaf, &key, slot);
8615
8616         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8617
8618         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8619                 skinny_level = (int)key.offset;
8620                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8621         } else {
8622                 struct btrfs_tree_block_info *info;
8623
8624                 info = (struct btrfs_tree_block_info *)(ei + 1);
8625                 skinny_level = btrfs_tree_block_level(leaf, info);
8626                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8627         }
8628
8629         if (eb) {
8630                 u64 header_gen;
8631                 u64 extent_gen;
8632
8633                 if (!(btrfs_extent_flags(leaf, ei) &
8634                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8635                         error(
8636                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8637                                 key.objectid, nodesize,
8638                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8639                         err = BACKREF_MISMATCH;
8640                 }
8641                 header_gen = btrfs_header_generation(eb);
8642                 extent_gen = btrfs_extent_generation(leaf, ei);
8643                 if (header_gen != extent_gen) {
8644                         error(
8645         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8646                                 key.objectid, nodesize, header_gen,
8647                                 extent_gen);
8648                         err = BACKREF_MISMATCH;
8649                 }
8650                 if (level != skinny_level) {
8651                         error(
8652                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8653                                 key.objectid, nodesize, level, skinny_level);
8654                         err = BACKREF_MISMATCH;
8655                 }
8656                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8657                         error(
8658                         "extent[%llu %u] is referred by other roots than %llu",
8659                                 key.objectid, nodesize, root->objectid);
8660                         err = BACKREF_MISMATCH;
8661                 }
8662         }
8663
8664         /*
8665          * Iterate the extent/metadata item to find the exact backref
8666          */
8667         item_size = btrfs_item_size_nr(leaf, slot);
8668         ptr = (unsigned long)iref;
8669         end = (unsigned long)ei + item_size;
8670         while (ptr < end) {
8671                 iref = (struct btrfs_extent_inline_ref *)ptr;
8672                 type = btrfs_extent_inline_ref_type(leaf, iref);
8673                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8674
8675                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8676                         (offset == root->objectid || offset == owner)) {
8677                         found_ref = 1;
8678                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8679                         /* Check if the backref points to valid referencer */
8680                         found_ref = !check_tree_block_ref(root, NULL, offset,
8681                                                           level + 1, owner);
8682                 }
8683
8684                 if (found_ref)
8685                         break;
8686                 ptr += btrfs_extent_inline_ref_size(type);
8687         }
8688
8689         /*
8690          * Inlined extent item doesn't have what we need, check
8691          * TREE_BLOCK_REF_KEY
8692          */
8693         if (!found_ref) {
8694                 btrfs_release_path(&path);
8695                 key.objectid = bytenr;
8696                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8697                 key.offset = root->objectid;
8698
8699                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8700                 if (!ret)
8701                         found_ref = 1;
8702         }
8703         if (!found_ref)
8704                 err |= BACKREF_MISSING;
8705 out:
8706         btrfs_release_path(&path);
8707         if (eb && (err & BACKREF_MISSING))
8708                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8709                         bytenr, nodesize, owner, level);
8710         return err;
8711 }
8712
8713 /*
8714  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8715  *
8716  * Return >0 any error found and output error message
8717  * Return 0 for no error found
8718  */
8719 static int check_extent_data_item(struct btrfs_root *root,
8720                                   struct extent_buffer *eb, int slot)
8721 {
8722         struct btrfs_file_extent_item *fi;
8723         struct btrfs_path path;
8724         struct btrfs_root *extent_root = root->fs_info->extent_root;
8725         struct btrfs_key fi_key;
8726         struct btrfs_key dbref_key;
8727         struct extent_buffer *leaf;
8728         struct btrfs_extent_item *ei;
8729         struct btrfs_extent_inline_ref *iref;
8730         struct btrfs_extent_data_ref *dref;
8731         u64 owner;
8732         u64 file_extent_gen;
8733         u64 disk_bytenr;
8734         u64 disk_num_bytes;
8735         u64 extent_num_bytes;
8736         u64 extent_flags;
8737         u64 extent_gen;
8738         u32 item_size;
8739         unsigned long end;
8740         unsigned long ptr;
8741         int type;
8742         u64 ref_root;
8743         int found_dbackref = 0;
8744         int err = 0;
8745         int ret;
8746
8747         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8748         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8749         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8750
8751         /* Nothing to check for hole and inline data extents */
8752         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8753             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8754                 return 0;
8755
8756         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8757         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8758         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8759
8760         /* Check unaligned disk_num_bytes and num_bytes */
8761         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8762                 error(
8763 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8764                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8765                         root->sectorsize);
8766                 err |= BYTES_UNALIGNED;
8767         } else {
8768                 data_bytes_allocated += disk_num_bytes;
8769         }
8770         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8771                 error(
8772 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8773                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8774                         root->sectorsize);
8775                 err |= BYTES_UNALIGNED;
8776         } else {
8777                 data_bytes_referenced += extent_num_bytes;
8778         }
8779         owner = btrfs_header_owner(eb);
8780
8781         /* Check the extent item of the file extent in extent tree */
8782         btrfs_init_path(&path);
8783         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8784         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8785         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8786
8787         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8788         if (ret) {
8789                 err |= BACKREF_MISSING;
8790                 goto error;
8791         }
8792
8793         leaf = path.nodes[0];
8794         slot = path.slots[0];
8795         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8796
8797         extent_flags = btrfs_extent_flags(leaf, ei);
8798         extent_gen = btrfs_extent_generation(leaf, ei);
8799
8800         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8801                 error(
8802                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8803                     disk_bytenr, disk_num_bytes,
8804                     BTRFS_EXTENT_FLAG_DATA);
8805                 err |= BACKREF_MISMATCH;
8806         }
8807
8808         if (file_extent_gen < extent_gen) {
8809                 error(
8810 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8811                         disk_bytenr, disk_num_bytes, file_extent_gen,
8812                         extent_gen);
8813                 err |= BACKREF_MISMATCH;
8814         }
8815
8816         /* Check data backref inside that extent item */
8817         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8818         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8819         ptr = (unsigned long)iref;
8820         end = (unsigned long)ei + item_size;
8821         while (ptr < end) {
8822                 iref = (struct btrfs_extent_inline_ref *)ptr;
8823                 type = btrfs_extent_inline_ref_type(leaf, iref);
8824                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8825
8826                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8827                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8828                         if (ref_root == owner || ref_root == root->objectid)
8829                                 found_dbackref = 1;
8830                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8831                         found_dbackref = !check_tree_block_ref(root, NULL,
8832                                 btrfs_extent_inline_ref_offset(leaf, iref),
8833                                 0, owner);
8834                 }
8835
8836                 if (found_dbackref)
8837                         break;
8838                 ptr += btrfs_extent_inline_ref_size(type);
8839         }
8840
8841         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8842         if (!found_dbackref) {
8843                 btrfs_release_path(&path);
8844
8845                 btrfs_init_path(&path);
8846                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8847                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8848                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8849                                 fi_key.objectid, fi_key.offset);
8850
8851                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8852                                         &dbref_key, &path, 0, 0);
8853                 if (!ret)
8854                         found_dbackref = 1;
8855         }
8856
8857         if (!found_dbackref)
8858                 err |= BACKREF_MISSING;
8859 error:
8860         btrfs_release_path(&path);
8861         if (err & BACKREF_MISSING) {
8862                 error("data extent[%llu %llu] backref lost",
8863                       disk_bytenr, disk_num_bytes);
8864         }
8865         return err;
8866 }
8867
8868 /*
8869  * Get real tree block level for the case like shared block
8870  * Return >= 0 as tree level
8871  * Return <0 for error
8872  */
8873 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8874 {
8875         struct extent_buffer *eb;
8876         struct btrfs_path path;
8877         struct btrfs_key key;
8878         struct btrfs_extent_item *ei;
8879         u64 flags;
8880         u64 transid;
8881         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8882         u8 backref_level;
8883         u8 header_level;
8884         int ret;
8885
8886         /* Search extent tree for extent generation and level */
8887         key.objectid = bytenr;
8888         key.type = BTRFS_METADATA_ITEM_KEY;
8889         key.offset = (u64)-1;
8890
8891         btrfs_init_path(&path);
8892         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8893         if (ret < 0)
8894                 goto release_out;
8895         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8896         if (ret < 0)
8897                 goto release_out;
8898         if (ret > 0) {
8899                 ret = -ENOENT;
8900                 goto release_out;
8901         }
8902
8903         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8904         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8905                             struct btrfs_extent_item);
8906         flags = btrfs_extent_flags(path.nodes[0], ei);
8907         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8908                 ret = -ENOENT;
8909                 goto release_out;
8910         }
8911
8912         /* Get transid for later read_tree_block() check */
8913         transid = btrfs_extent_generation(path.nodes[0], ei);
8914
8915         /* Get backref level as one source */
8916         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8917                 backref_level = key.offset;
8918         } else {
8919                 struct btrfs_tree_block_info *info;
8920
8921                 info = (struct btrfs_tree_block_info *)(ei + 1);
8922                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8923         }
8924         btrfs_release_path(&path);
8925
8926         /* Get level from tree block as an alternative source */
8927         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8928         if (!extent_buffer_uptodate(eb)) {
8929                 free_extent_buffer(eb);
8930                 return -EIO;
8931         }
8932         header_level = btrfs_header_level(eb);
8933         free_extent_buffer(eb);
8934
8935         if (header_level != backref_level)
8936                 return -EIO;
8937         return header_level;
8938
8939 release_out:
8940         btrfs_release_path(&path);
8941         return ret;
8942 }
8943
8944 /*
8945  * Check if a tree block backref is valid (points to a valid tree block)
8946  * if level == -1, level will be resolved
8947  * Return >0 for any error found and print error message
8948  */
8949 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8950                                     u64 bytenr, int level)
8951 {
8952         struct btrfs_root *root;
8953         struct btrfs_key key;
8954         struct btrfs_path path;
8955         struct extent_buffer *eb;
8956         struct extent_buffer *node;
8957         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8958         int err = 0;
8959         int ret;
8960
8961         /* Query level for level == -1 special case */
8962         if (level == -1)
8963                 level = query_tree_block_level(fs_info, bytenr);
8964         if (level < 0) {
8965                 err |= REFERENCER_MISSING;
8966                 goto out;
8967         }
8968
8969         key.objectid = root_id;
8970         key.type = BTRFS_ROOT_ITEM_KEY;
8971         key.offset = (u64)-1;
8972
8973         root = btrfs_read_fs_root(fs_info, &key);
8974         if (IS_ERR(root)) {
8975                 err |= REFERENCER_MISSING;
8976                 goto out;
8977         }
8978
8979         /* Read out the tree block to get item/node key */
8980         eb = read_tree_block(root, bytenr, root->nodesize, 0);
8981         if (!extent_buffer_uptodate(eb)) {
8982                 err |= REFERENCER_MISSING;
8983                 free_extent_buffer(eb);
8984                 goto out;
8985         }
8986
8987         /* Empty tree, no need to check key */
8988         if (!btrfs_header_nritems(eb) && !level) {
8989                 free_extent_buffer(eb);
8990                 goto out;
8991         }
8992
8993         if (level)
8994                 btrfs_node_key_to_cpu(eb, &key, 0);
8995         else
8996                 btrfs_item_key_to_cpu(eb, &key, 0);
8997
8998         free_extent_buffer(eb);
8999
9000         btrfs_init_path(&path);
9001         /* Search with the first key, to ensure we can reach it */
9002         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9003         if (ret) {
9004                 err |= REFERENCER_MISSING;
9005                 goto release_out;
9006         }
9007
9008         node = path.nodes[level];
9009         if (btrfs_header_bytenr(node) != bytenr) {
9010                 error(
9011         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9012                         bytenr, nodesize, bytenr,
9013                         btrfs_header_bytenr(node));
9014                 err |= REFERENCER_MISMATCH;
9015         }
9016         if (btrfs_header_level(node) != level) {
9017                 error(
9018         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9019                         bytenr, nodesize, level,
9020                         btrfs_header_level(node));
9021                 err |= REFERENCER_MISMATCH;
9022         }
9023
9024 release_out:
9025         btrfs_release_path(&path);
9026 out:
9027         if (err & REFERENCER_MISSING) {
9028                 if (level < 0)
9029                         error("extent [%llu %d] lost referencer (owner: %llu)",
9030                                 bytenr, nodesize, root_id);
9031                 else
9032                         error(
9033                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9034                                 bytenr, nodesize, root_id, level);
9035         }
9036
9037         return err;
9038 }
9039
9040 /*
9041  * Check referencer for shared block backref
9042  * If level == -1, this function will resolve the level.
9043  */
9044 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9045                                      u64 parent, u64 bytenr, int level)
9046 {
9047         struct extent_buffer *eb;
9048         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9049         u32 nr;
9050         int found_parent = 0;
9051         int i;
9052
9053         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9054         if (!extent_buffer_uptodate(eb))
9055                 goto out;
9056
9057         if (level == -1)
9058                 level = query_tree_block_level(fs_info, bytenr);
9059         if (level < 0)
9060                 goto out;
9061
9062         if (level + 1 != btrfs_header_level(eb))
9063                 goto out;
9064
9065         nr = btrfs_header_nritems(eb);
9066         for (i = 0; i < nr; i++) {
9067                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9068                         found_parent = 1;
9069                         break;
9070                 }
9071         }
9072 out:
9073         free_extent_buffer(eb);
9074         if (!found_parent) {
9075                 error(
9076         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9077                         bytenr, nodesize, parent, level);
9078                 return REFERENCER_MISSING;
9079         }
9080         return 0;
9081 }
9082
9083 /*
9084  * Check referencer for normal (inlined) data ref
9085  * If len == 0, it will be resolved by searching in extent tree
9086  */
9087 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9088                                      u64 root_id, u64 objectid, u64 offset,
9089                                      u64 bytenr, u64 len, u32 count)
9090 {
9091         struct btrfs_root *root;
9092         struct btrfs_root *extent_root = fs_info->extent_root;
9093         struct btrfs_key key;
9094         struct btrfs_path path;
9095         struct extent_buffer *leaf;
9096         struct btrfs_file_extent_item *fi;
9097         u32 found_count = 0;
9098         int slot;
9099         int ret = 0;
9100
9101         if (!len) {
9102                 key.objectid = bytenr;
9103                 key.type = BTRFS_EXTENT_ITEM_KEY;
9104                 key.offset = (u64)-1;
9105
9106                 btrfs_init_path(&path);
9107                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9108                 if (ret < 0)
9109                         goto out;
9110                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9111                 if (ret)
9112                         goto out;
9113                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9114                 if (key.objectid != bytenr ||
9115                     key.type != BTRFS_EXTENT_ITEM_KEY)
9116                         goto out;
9117                 len = key.offset;
9118                 btrfs_release_path(&path);
9119         }
9120         key.objectid = root_id;
9121         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9122         key.offset = (u64)-1;
9123         btrfs_init_path(&path);
9124
9125         root = btrfs_read_fs_root(fs_info, &key);
9126         if (IS_ERR(root))
9127                 goto out;
9128
9129         key.objectid = objectid;
9130         key.type = BTRFS_EXTENT_DATA_KEY;
9131         /*
9132          * It can be nasty as data backref offset is
9133          * file offset - file extent offset, which is smaller or
9134          * equal to original backref offset.  The only special case is
9135          * overflow.  So we need to special check and do further search.
9136          */
9137         key.offset = offset & (1ULL << 63) ? 0 : offset;
9138
9139         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9140         if (ret < 0)
9141                 goto out;
9142
9143         /*
9144          * Search afterwards to get correct one
9145          * NOTE: As we must do a comprehensive check on the data backref to
9146          * make sure the dref count also matches, we must iterate all file
9147          * extents for that inode.
9148          */
9149         while (1) {
9150                 leaf = path.nodes[0];
9151                 slot = path.slots[0];
9152
9153                 btrfs_item_key_to_cpu(leaf, &key, slot);
9154                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9155                         break;
9156                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9157                 /*
9158                  * Except normal disk bytenr and disk num bytes, we still
9159                  * need to do extra check on dbackref offset as
9160                  * dbackref offset = file_offset - file_extent_offset
9161                  */
9162                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9163                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9164                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9165                     offset)
9166                         found_count++;
9167
9168                 ret = btrfs_next_item(root, &path);
9169                 if (ret)
9170                         break;
9171         }
9172 out:
9173         btrfs_release_path(&path);
9174         if (found_count != count) {
9175                 error(
9176 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9177                         bytenr, len, root_id, objectid, offset, count, found_count);
9178                 return REFERENCER_MISSING;
9179         }
9180         return 0;
9181 }
9182
9183 /*
9184  * Check if the referencer of a shared data backref exists
9185  */
9186 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9187                                      u64 parent, u64 bytenr)
9188 {
9189         struct extent_buffer *eb;
9190         struct btrfs_key key;
9191         struct btrfs_file_extent_item *fi;
9192         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9193         u32 nr;
9194         int found_parent = 0;
9195         int i;
9196
9197         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9198         if (!extent_buffer_uptodate(eb))
9199                 goto out;
9200
9201         nr = btrfs_header_nritems(eb);
9202         for (i = 0; i < nr; i++) {
9203                 btrfs_item_key_to_cpu(eb, &key, i);
9204                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9205                         continue;
9206
9207                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9208                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9209                         continue;
9210
9211                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9212                         found_parent = 1;
9213                         break;
9214                 }
9215         }
9216
9217 out:
9218         free_extent_buffer(eb);
9219         if (!found_parent) {
9220                 error("shared extent %llu referencer lost (parent: %llu)",
9221                         bytenr, parent);
9222                 return REFERENCER_MISSING;
9223         }
9224         return 0;
9225 }
9226
9227 /*
9228  * This function will check a given extent item, including its backref and
9229  * itself (like crossing stripe boundary and type)
9230  *
9231  * Since we don't use extent_record anymore, introduce new error bit
9232  */
9233 static int check_extent_item(struct btrfs_fs_info *fs_info,
9234                              struct extent_buffer *eb, int slot)
9235 {
9236         struct btrfs_extent_item *ei;
9237         struct btrfs_extent_inline_ref *iref;
9238         struct btrfs_extent_data_ref *dref;
9239         unsigned long end;
9240         unsigned long ptr;
9241         int type;
9242         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9243         u32 item_size = btrfs_item_size_nr(eb, slot);
9244         u64 flags;
9245         u64 offset;
9246         int metadata = 0;
9247         int level;
9248         struct btrfs_key key;
9249         int ret;
9250         int err = 0;
9251
9252         btrfs_item_key_to_cpu(eb, &key, slot);
9253         if (key.type == BTRFS_EXTENT_ITEM_KEY)
9254                 bytes_used += key.offset;
9255         else
9256                 bytes_used += nodesize;
9257
9258         if (item_size < sizeof(*ei)) {
9259                 /*
9260                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9261                  * old thing when on disk format is still un-determined.
9262                  * No need to care about it anymore
9263                  */
9264                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9265                 return -ENOTTY;
9266         }
9267
9268         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9269         flags = btrfs_extent_flags(eb, ei);
9270
9271         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9272                 metadata = 1;
9273         if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9274                 error("bad metadata [%llu, %llu) crossing stripe boundary",
9275                       key.objectid, key.objectid + nodesize);
9276                 err |= CROSSING_STRIPE_BOUNDARY;
9277         }
9278
9279         ptr = (unsigned long)(ei + 1);
9280
9281         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9282                 /* Old EXTENT_ITEM metadata */
9283                 struct btrfs_tree_block_info *info;
9284
9285                 info = (struct btrfs_tree_block_info *)ptr;
9286                 level = btrfs_tree_block_level(eb, info);
9287                 ptr += sizeof(struct btrfs_tree_block_info);
9288         } else {
9289                 /* New METADATA_ITEM */
9290                 level = key.offset;
9291         }
9292         end = (unsigned long)ei + item_size;
9293
9294         if (ptr >= end) {
9295                 err |= ITEM_SIZE_MISMATCH;
9296                 goto out;
9297         }
9298
9299         /* Now check every backref in this extent item */
9300 next:
9301         iref = (struct btrfs_extent_inline_ref *)ptr;
9302         type = btrfs_extent_inline_ref_type(eb, iref);
9303         offset = btrfs_extent_inline_ref_offset(eb, iref);
9304         switch (type) {
9305         case BTRFS_TREE_BLOCK_REF_KEY:
9306                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9307                                                level);
9308                 err |= ret;
9309                 break;
9310         case BTRFS_SHARED_BLOCK_REF_KEY:
9311                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9312                                                  level);
9313                 err |= ret;
9314                 break;
9315         case BTRFS_EXTENT_DATA_REF_KEY:
9316                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9317                 ret = check_extent_data_backref(fs_info,
9318                                 btrfs_extent_data_ref_root(eb, dref),
9319                                 btrfs_extent_data_ref_objectid(eb, dref),
9320                                 btrfs_extent_data_ref_offset(eb, dref),
9321                                 key.objectid, key.offset,
9322                                 btrfs_extent_data_ref_count(eb, dref));
9323                 err |= ret;
9324                 break;
9325         case BTRFS_SHARED_DATA_REF_KEY:
9326                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9327                 err |= ret;
9328                 break;
9329         default:
9330                 error("extent[%llu %d %llu] has unknown ref type: %d",
9331                         key.objectid, key.type, key.offset, type);
9332                 err |= UNKNOWN_TYPE;
9333                 goto out;
9334         }
9335
9336         ptr += btrfs_extent_inline_ref_size(type);
9337         if (ptr < end)
9338                 goto next;
9339
9340 out:
9341         return err;
9342 }
9343
9344 /*
9345  * Check if a dev extent item is referred correctly by its chunk
9346  */
9347 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9348                                  struct extent_buffer *eb, int slot)
9349 {
9350         struct btrfs_root *chunk_root = fs_info->chunk_root;
9351         struct btrfs_dev_extent *ptr;
9352         struct btrfs_path path;
9353         struct btrfs_key chunk_key;
9354         struct btrfs_key devext_key;
9355         struct btrfs_chunk *chunk;
9356         struct extent_buffer *l;
9357         int num_stripes;
9358         u64 length;
9359         int i;
9360         int found_chunk = 0;
9361         int ret;
9362
9363         btrfs_item_key_to_cpu(eb, &devext_key, slot);
9364         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9365         length = btrfs_dev_extent_length(eb, ptr);
9366
9367         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9368         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9369         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9370
9371         btrfs_init_path(&path);
9372         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9373         if (ret)
9374                 goto out;
9375
9376         l = path.nodes[0];
9377         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9378         if (btrfs_chunk_length(l, chunk) != length)
9379                 goto out;
9380
9381         num_stripes = btrfs_chunk_num_stripes(l, chunk);
9382         for (i = 0; i < num_stripes; i++) {
9383                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9384                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9385
9386                 if (devid == devext_key.objectid &&
9387                     offset == devext_key.offset) {
9388                         found_chunk = 1;
9389                         break;
9390                 }
9391         }
9392 out:
9393         btrfs_release_path(&path);
9394         if (!found_chunk) {
9395                 error(
9396                 "device extent[%llu, %llu, %llu] did not find the related chunk",
9397                         devext_key.objectid, devext_key.offset, length);
9398                 return REFERENCER_MISSING;
9399         }
9400         return 0;
9401 }
9402
9403 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
9404                            struct btrfs_root *root, int overwrite)
9405 {
9406         struct extent_buffer *c;
9407         struct extent_buffer *old = root->node;
9408         int level;
9409         int ret;
9410         struct btrfs_disk_key disk_key = {0,0,0};
9411
9412         level = 0;
9413
9414         if (overwrite) {
9415                 c = old;
9416                 extent_buffer_get(c);
9417                 goto init;
9418         }
9419         c = btrfs_alloc_free_block(trans, root,
9420                                    root->nodesize,
9421                                    root->root_key.objectid,
9422                                    &disk_key, level, 0, 0);
9423         if (IS_ERR(c)) {
9424                 c = old;
9425                 extent_buffer_get(c);
9426                 overwrite = 1;
9427         }
9428 init:
9429         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
9430         btrfs_set_header_level(c, level);
9431         btrfs_set_header_bytenr(c, c->start);
9432         btrfs_set_header_generation(c, trans->transid);
9433         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
9434         btrfs_set_header_owner(c, root->root_key.objectid);
9435
9436         write_extent_buffer(c, root->fs_info->fsid,
9437                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
9438
9439         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
9440                             btrfs_header_chunk_tree_uuid(c),
9441                             BTRFS_UUID_SIZE);
9442
9443         btrfs_mark_buffer_dirty(c);
9444         /*
9445          * this case can happen in the following case:
9446          *
9447          * 1.overwrite previous root.
9448          *
9449          * 2.reinit reloc data root, this is because we skip pin
9450          * down reloc data tree before which means we can allocate
9451          * same block bytenr here.
9452          */
9453         if (old->start == c->start) {
9454                 btrfs_set_root_generation(&root->root_item,
9455                                           trans->transid);
9456                 root->root_item.level = btrfs_header_level(root->node);
9457                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
9458                                         &root->root_key, &root->root_item);
9459                 if (ret) {
9460                         free_extent_buffer(c);
9461                         return ret;
9462                 }
9463         }
9464         free_extent_buffer(old);
9465         root->node = c;
9466         add_root_to_dirty_list(root);
9467         return 0;
9468 }
9469
9470 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
9471                                 struct extent_buffer *eb, int tree_root)
9472 {
9473         struct extent_buffer *tmp;
9474         struct btrfs_root_item *ri;
9475         struct btrfs_key key;
9476         u64 bytenr;
9477         u32 nodesize;
9478         int level = btrfs_header_level(eb);
9479         int nritems;
9480         int ret;
9481         int i;
9482
9483         /*
9484          * If we have pinned this block before, don't pin it again.
9485          * This can not only avoid forever loop with broken filesystem
9486          * but also give us some speedups.
9487          */
9488         if (test_range_bit(&fs_info->pinned_extents, eb->start,
9489                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
9490                 return 0;
9491
9492         btrfs_pin_extent(fs_info, eb->start, eb->len);
9493
9494         nodesize = btrfs_super_nodesize(fs_info->super_copy);
9495         nritems = btrfs_header_nritems(eb);
9496         for (i = 0; i < nritems; i++) {
9497                 if (level == 0) {
9498                         btrfs_item_key_to_cpu(eb, &key, i);
9499                         if (key.type != BTRFS_ROOT_ITEM_KEY)
9500                                 continue;
9501                         /* Skip the extent root and reloc roots */
9502                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
9503                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
9504                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
9505                                 continue;
9506                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
9507                         bytenr = btrfs_disk_root_bytenr(eb, ri);
9508
9509                         /*
9510                          * If at any point we start needing the real root we
9511                          * will have to build a stump root for the root we are
9512                          * in, but for now this doesn't actually use the root so
9513                          * just pass in extent_root.
9514                          */
9515                         tmp = read_tree_block(fs_info->extent_root, bytenr,
9516                                               nodesize, 0);
9517                         if (!extent_buffer_uptodate(tmp)) {
9518                                 fprintf(stderr, "Error reading root block\n");
9519                                 return -EIO;
9520                         }
9521                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
9522                         free_extent_buffer(tmp);
9523                         if (ret)
9524                                 return ret;
9525                 } else {
9526                         bytenr = btrfs_node_blockptr(eb, i);
9527
9528                         /* If we aren't the tree root don't read the block */
9529                         if (level == 1 && !tree_root) {
9530                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
9531                                 continue;
9532                         }
9533
9534                         tmp = read_tree_block(fs_info->extent_root, bytenr,
9535                                               nodesize, 0);
9536                         if (!extent_buffer_uptodate(tmp)) {
9537                                 fprintf(stderr, "Error reading tree block\n");
9538                                 return -EIO;
9539                         }
9540                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
9541                         free_extent_buffer(tmp);
9542                         if (ret)
9543                                 return ret;
9544                 }
9545         }
9546
9547         return 0;
9548 }
9549
9550 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
9551 {
9552         int ret;
9553
9554         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
9555         if (ret)
9556                 return ret;
9557
9558         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
9559 }
9560
9561 static int reset_block_groups(struct btrfs_fs_info *fs_info)
9562 {
9563         struct btrfs_block_group_cache *cache;
9564         struct btrfs_path *path;
9565         struct extent_buffer *leaf;
9566         struct btrfs_chunk *chunk;
9567         struct btrfs_key key;
9568         int ret;
9569         u64 start;
9570
9571         path = btrfs_alloc_path();
9572         if (!path)
9573                 return -ENOMEM;
9574
9575         key.objectid = 0;
9576         key.type = BTRFS_CHUNK_ITEM_KEY;
9577         key.offset = 0;
9578
9579         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
9580         if (ret < 0) {
9581                 btrfs_free_path(path);
9582                 return ret;
9583         }
9584
9585         /*
9586          * We do this in case the block groups were screwed up and had alloc
9587          * bits that aren't actually set on the chunks.  This happens with
9588          * restored images every time and could happen in real life I guess.
9589          */
9590         fs_info->avail_data_alloc_bits = 0;
9591         fs_info->avail_metadata_alloc_bits = 0;
9592         fs_info->avail_system_alloc_bits = 0;
9593
9594         /* First we need to create the in-memory block groups */
9595         while (1) {
9596                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9597                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
9598                         if (ret < 0) {
9599                                 btrfs_free_path(path);
9600                                 return ret;
9601                         }
9602                         if (ret) {
9603                                 ret = 0;
9604                                 break;
9605                         }
9606                 }
9607                 leaf = path->nodes[0];
9608                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9609                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
9610                         path->slots[0]++;
9611                         continue;
9612                 }
9613
9614                 chunk = btrfs_item_ptr(leaf, path->slots[0],
9615                                        struct btrfs_chunk);
9616                 btrfs_add_block_group(fs_info, 0,
9617                                       btrfs_chunk_type(leaf, chunk),
9618                                       key.objectid, key.offset,
9619                                       btrfs_chunk_length(leaf, chunk));
9620                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
9621                                  key.offset + btrfs_chunk_length(leaf, chunk),
9622                                  GFP_NOFS);
9623                 path->slots[0]++;
9624         }
9625         start = 0;
9626         while (1) {
9627                 cache = btrfs_lookup_first_block_group(fs_info, start);
9628                 if (!cache)
9629                         break;
9630                 cache->cached = 1;
9631                 start = cache->key.objectid + cache->key.offset;
9632         }
9633
9634         btrfs_free_path(path);
9635         return 0;
9636 }
9637
9638 static int reset_balance(struct btrfs_trans_handle *trans,
9639                          struct btrfs_fs_info *fs_info)
9640 {
9641         struct btrfs_root *root = fs_info->tree_root;
9642         struct btrfs_path *path;
9643         struct extent_buffer *leaf;
9644         struct btrfs_key key;
9645         int del_slot, del_nr = 0;
9646         int ret;
9647         int found = 0;
9648
9649         path = btrfs_alloc_path();
9650         if (!path)
9651                 return -ENOMEM;
9652
9653         key.objectid = BTRFS_BALANCE_OBJECTID;
9654         key.type = BTRFS_BALANCE_ITEM_KEY;
9655         key.offset = 0;
9656
9657         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9658         if (ret) {
9659                 if (ret > 0)
9660                         ret = 0;
9661                 if (!ret)
9662                         goto reinit_data_reloc;
9663                 else
9664                         goto out;
9665         }
9666
9667         ret = btrfs_del_item(trans, root, path);
9668         if (ret)
9669                 goto out;
9670         btrfs_release_path(path);
9671
9672         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
9673         key.type = BTRFS_ROOT_ITEM_KEY;
9674         key.offset = 0;
9675
9676         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9677         if (ret < 0)
9678                 goto out;
9679         while (1) {
9680                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9681                         if (!found)
9682                                 break;
9683
9684                         if (del_nr) {
9685                                 ret = btrfs_del_items(trans, root, path,
9686                                                       del_slot, del_nr);
9687                                 del_nr = 0;
9688                                 if (ret)
9689                                         goto out;
9690                         }
9691                         key.offset++;
9692                         btrfs_release_path(path);
9693
9694                         found = 0;
9695                         ret = btrfs_search_slot(trans, root, &key, path,
9696                                                 -1, 1);
9697                         if (ret < 0)
9698                                 goto out;
9699                         continue;
9700                 }
9701                 found = 1;
9702                 leaf = path->nodes[0];
9703                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9704                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
9705                         break;
9706                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9707                         path->slots[0]++;
9708                         continue;
9709                 }
9710                 if (!del_nr) {
9711                         del_slot = path->slots[0];
9712                         del_nr = 1;
9713                 } else {
9714                         del_nr++;
9715                 }
9716                 path->slots[0]++;
9717         }
9718
9719         if (del_nr) {
9720                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
9721                 if (ret)
9722                         goto out;
9723         }
9724         btrfs_release_path(path);
9725
9726 reinit_data_reloc:
9727         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
9728         key.type = BTRFS_ROOT_ITEM_KEY;
9729         key.offset = (u64)-1;
9730         root = btrfs_read_fs_root(fs_info, &key);
9731         if (IS_ERR(root)) {
9732                 fprintf(stderr, "Error reading data reloc tree\n");
9733                 ret = PTR_ERR(root);
9734                 goto out;
9735         }
9736         record_root_in_trans(trans, root);
9737         ret = btrfs_fsck_reinit_root(trans, root, 0);
9738         if (ret)
9739                 goto out;
9740         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
9741 out:
9742         btrfs_free_path(path);
9743         return ret;
9744 }
9745
9746 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
9747                               struct btrfs_fs_info *fs_info)
9748 {
9749         u64 start = 0;
9750         int ret;
9751
9752         /*
9753          * The only reason we don't do this is because right now we're just
9754          * walking the trees we find and pinning down their bytes, we don't look
9755          * at any of the leaves.  In order to do mixed groups we'd have to check
9756          * the leaves of any fs roots and pin down the bytes for any file
9757          * extents we find.  Not hard but why do it if we don't have to?
9758          */
9759         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
9760                 fprintf(stderr, "We don't support re-initing the extent tree "
9761                         "for mixed block groups yet, please notify a btrfs "
9762                         "developer you want to do this so they can add this "
9763                         "functionality.\n");
9764                 return -EINVAL;
9765         }
9766
9767         /*
9768          * first we need to walk all of the trees except the extent tree and pin
9769          * down the bytes that are in use so we don't overwrite any existing
9770          * metadata.
9771          */
9772         ret = pin_metadata_blocks(fs_info);
9773         if (ret) {
9774                 fprintf(stderr, "error pinning down used bytes\n");
9775                 return ret;
9776         }
9777
9778         /*
9779          * Need to drop all the block groups since we're going to recreate all
9780          * of them again.
9781          */
9782         btrfs_free_block_groups(fs_info);
9783         ret = reset_block_groups(fs_info);
9784         if (ret) {
9785                 fprintf(stderr, "error resetting the block groups\n");
9786                 return ret;
9787         }
9788
9789         /* Ok we can allocate now, reinit the extent root */
9790         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
9791         if (ret) {
9792                 fprintf(stderr, "extent root initialization failed\n");
9793                 /*
9794                  * When the transaction code is updated we should end the
9795                  * transaction, but for now progs only knows about commit so
9796                  * just return an error.
9797                  */
9798                 return ret;
9799         }
9800
9801         /*
9802          * Now we have all the in-memory block groups setup so we can make
9803          * allocations properly, and the metadata we care about is safe since we
9804          * pinned all of it above.
9805          */
9806         while (1) {
9807                 struct btrfs_block_group_cache *cache;
9808
9809                 cache = btrfs_lookup_first_block_group(fs_info, start);
9810                 if (!cache)
9811                         break;
9812                 start = cache->key.objectid + cache->key.offset;
9813                 ret = btrfs_insert_item(trans, fs_info->extent_root,
9814                                         &cache->key, &cache->item,
9815                                         sizeof(cache->item));
9816                 if (ret) {
9817                         fprintf(stderr, "Error adding block group\n");
9818                         return ret;
9819                 }
9820                 btrfs_extent_post_op(trans, fs_info->extent_root);
9821         }
9822
9823         ret = reset_balance(trans, fs_info);
9824         if (ret)
9825                 fprintf(stderr, "error resetting the pending balance\n");
9826
9827         return ret;
9828 }
9829
9830 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
9831 {
9832         struct btrfs_path *path;
9833         struct btrfs_trans_handle *trans;
9834         struct btrfs_key key;
9835         int ret;
9836
9837         printf("Recowing metadata block %llu\n", eb->start);
9838         key.objectid = btrfs_header_owner(eb);
9839         key.type = BTRFS_ROOT_ITEM_KEY;
9840         key.offset = (u64)-1;
9841
9842         root = btrfs_read_fs_root(root->fs_info, &key);
9843         if (IS_ERR(root)) {
9844                 fprintf(stderr, "Couldn't find owner root %llu\n",
9845                         key.objectid);
9846                 return PTR_ERR(root);
9847         }
9848
9849         path = btrfs_alloc_path();
9850         if (!path)
9851                 return -ENOMEM;
9852
9853         trans = btrfs_start_transaction(root, 1);
9854         if (IS_ERR(trans)) {
9855                 btrfs_free_path(path);
9856                 return PTR_ERR(trans);
9857         }
9858
9859         path->lowest_level = btrfs_header_level(eb);
9860         if (path->lowest_level)
9861                 btrfs_node_key_to_cpu(eb, &key, 0);
9862         else
9863                 btrfs_item_key_to_cpu(eb, &key, 0);
9864
9865         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9866         btrfs_commit_transaction(trans, root);
9867         btrfs_free_path(path);
9868         return ret;
9869 }
9870
9871 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
9872 {
9873         struct btrfs_path *path;
9874         struct btrfs_trans_handle *trans;
9875         struct btrfs_key key;
9876         int ret;
9877
9878         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9879                bad->key.type, bad->key.offset);
9880         key.objectid = bad->root_id;
9881         key.type = BTRFS_ROOT_ITEM_KEY;
9882         key.offset = (u64)-1;
9883
9884         root = btrfs_read_fs_root(root->fs_info, &key);
9885         if (IS_ERR(root)) {
9886                 fprintf(stderr, "Couldn't find owner root %llu\n",
9887                         key.objectid);
9888                 return PTR_ERR(root);
9889         }
9890
9891         path = btrfs_alloc_path();
9892         if (!path)
9893                 return -ENOMEM;
9894
9895         trans = btrfs_start_transaction(root, 1);
9896         if (IS_ERR(trans)) {
9897                 btrfs_free_path(path);
9898                 return PTR_ERR(trans);
9899         }
9900
9901         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9902         if (ret) {
9903                 if (ret > 0)
9904                         ret = 0;
9905                 goto out;
9906         }
9907         ret = btrfs_del_item(trans, root, path);
9908 out:
9909         btrfs_commit_transaction(trans, root);
9910         btrfs_free_path(path);
9911         return ret;
9912 }
9913
9914 static int zero_log_tree(struct btrfs_root *root)
9915 {
9916         struct btrfs_trans_handle *trans;
9917         int ret;
9918
9919         trans = btrfs_start_transaction(root, 1);
9920         if (IS_ERR(trans)) {
9921                 ret = PTR_ERR(trans);
9922                 return ret;
9923         }
9924         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9925         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9926         ret = btrfs_commit_transaction(trans, root);
9927         return ret;
9928 }
9929
9930 static int populate_csum(struct btrfs_trans_handle *trans,
9931                          struct btrfs_root *csum_root, char *buf, u64 start,
9932                          u64 len)
9933 {
9934         u64 offset = 0;
9935         u64 sectorsize;
9936         int ret = 0;
9937
9938         while (offset < len) {
9939                 sectorsize = csum_root->sectorsize;
9940                 ret = read_extent_data(csum_root, buf, start + offset,
9941                                        &sectorsize, 0);
9942                 if (ret)
9943                         break;
9944                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9945                                             start + offset, buf, sectorsize);
9946                 if (ret)
9947                         break;
9948                 offset += sectorsize;
9949         }
9950         return ret;
9951 }
9952
9953 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9954                                       struct btrfs_root *csum_root,
9955                                       struct btrfs_root *cur_root)
9956 {
9957         struct btrfs_path *path;
9958         struct btrfs_key key;
9959         struct extent_buffer *node;
9960         struct btrfs_file_extent_item *fi;
9961         char *buf = NULL;
9962         u64 start = 0;
9963         u64 len = 0;
9964         int slot = 0;
9965         int ret = 0;
9966
9967         path = btrfs_alloc_path();
9968         if (!path)
9969                 return -ENOMEM;
9970         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9971         if (!buf) {
9972                 ret = -ENOMEM;
9973                 goto out;
9974         }
9975
9976         key.objectid = 0;
9977         key.offset = 0;
9978         key.type = 0;
9979
9980         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9981         if (ret < 0)
9982                 goto out;
9983         /* Iterate all regular file extents and fill its csum */
9984         while (1) {
9985                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9986
9987                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9988                         goto next;
9989                 node = path->nodes[0];
9990                 slot = path->slots[0];
9991                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9992                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9993                         goto next;
9994                 start = btrfs_file_extent_disk_bytenr(node, fi);
9995                 len = btrfs_file_extent_disk_num_bytes(node, fi);
9996
9997                 ret = populate_csum(trans, csum_root, buf, start, len);
9998                 if (ret == -EEXIST)
9999                         ret = 0;
10000                 if (ret < 0)
10001                         goto out;
10002 next:
10003                 /*
10004                  * TODO: if next leaf is corrupted, jump to nearest next valid
10005                  * leaf.
10006                  */
10007                 ret = btrfs_next_item(cur_root, path);
10008                 if (ret < 0)
10009                         goto out;
10010                 if (ret > 0) {
10011                         ret = 0;
10012                         goto out;
10013                 }
10014         }
10015
10016 out:
10017         btrfs_free_path(path);
10018         free(buf);
10019         return ret;
10020 }
10021
10022 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10023                                   struct btrfs_root *csum_root)
10024 {
10025         struct btrfs_fs_info *fs_info = csum_root->fs_info;
10026         struct btrfs_path *path;
10027         struct btrfs_root *tree_root = fs_info->tree_root;
10028         struct btrfs_root *cur_root;
10029         struct extent_buffer *node;
10030         struct btrfs_key key;
10031         int slot = 0;
10032         int ret = 0;
10033
10034         path = btrfs_alloc_path();
10035         if (!path)
10036                 return -ENOMEM;
10037
10038         key.objectid = BTRFS_FS_TREE_OBJECTID;
10039         key.offset = 0;
10040         key.type = BTRFS_ROOT_ITEM_KEY;
10041
10042         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10043         if (ret < 0)
10044                 goto out;
10045         if (ret > 0) {
10046                 ret = -ENOENT;
10047                 goto out;
10048         }
10049
10050         while (1) {
10051                 node = path->nodes[0];
10052                 slot = path->slots[0];
10053                 btrfs_item_key_to_cpu(node, &key, slot);
10054                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10055                         goto out;
10056                 if (key.type != BTRFS_ROOT_ITEM_KEY)
10057                         goto next;
10058                 if (!is_fstree(key.objectid))
10059                         goto next;
10060                 key.offset = (u64)-1;
10061
10062                 cur_root = btrfs_read_fs_root(fs_info, &key);
10063                 if (IS_ERR(cur_root) || !cur_root) {
10064                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10065                                 key.objectid);
10066                         goto out;
10067                 }
10068                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10069                                 cur_root);
10070                 if (ret < 0)
10071                         goto out;
10072 next:
10073                 ret = btrfs_next_item(tree_root, path);
10074                 if (ret > 0) {
10075                         ret = 0;
10076                         goto out;
10077                 }
10078                 if (ret < 0)
10079                         goto out;
10080         }
10081
10082 out:
10083         btrfs_free_path(path);
10084         return ret;
10085 }
10086
10087 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10088                                       struct btrfs_root *csum_root)
10089 {
10090         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10091         struct btrfs_path *path;
10092         struct btrfs_extent_item *ei;
10093         struct extent_buffer *leaf;
10094         char *buf;
10095         struct btrfs_key key;
10096         int ret;
10097
10098         path = btrfs_alloc_path();
10099         if (!path)
10100                 return -ENOMEM;
10101
10102         key.objectid = 0;
10103         key.type = BTRFS_EXTENT_ITEM_KEY;
10104         key.offset = 0;
10105
10106         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10107         if (ret < 0) {
10108                 btrfs_free_path(path);
10109                 return ret;
10110         }
10111
10112         buf = malloc(csum_root->sectorsize);
10113         if (!buf) {
10114                 btrfs_free_path(path);
10115                 return -ENOMEM;
10116         }
10117
10118         while (1) {
10119                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10120                         ret = btrfs_next_leaf(extent_root, path);
10121                         if (ret < 0)
10122                                 break;
10123                         if (ret) {
10124                                 ret = 0;
10125                                 break;
10126                         }
10127                 }
10128                 leaf = path->nodes[0];
10129
10130                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10131                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10132                         path->slots[0]++;
10133                         continue;
10134                 }
10135
10136                 ei = btrfs_item_ptr(leaf, path->slots[0],
10137                                     struct btrfs_extent_item);
10138                 if (!(btrfs_extent_flags(leaf, ei) &
10139                       BTRFS_EXTENT_FLAG_DATA)) {
10140                         path->slots[0]++;
10141                         continue;
10142                 }
10143
10144                 ret = populate_csum(trans, csum_root, buf, key.objectid,
10145                                     key.offset);
10146                 if (ret)
10147                         break;
10148                 path->slots[0]++;
10149         }
10150
10151         btrfs_free_path(path);
10152         free(buf);
10153         return ret;
10154 }
10155
10156 /*
10157  * Recalculate the csum and put it into the csum tree.
10158  *
10159  * Extent tree init will wipe out all the extent info, so in that case, we
10160  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
10161  * will use fs/subvol trees to init the csum tree.
10162  */
10163 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10164                           struct btrfs_root *csum_root,
10165                           int search_fs_tree)
10166 {
10167         if (search_fs_tree)
10168                 return fill_csum_tree_from_fs(trans, csum_root);
10169         else
10170                 return fill_csum_tree_from_extent(trans, csum_root);
10171 }
10172
10173 static void free_roots_info_cache(void)
10174 {
10175         if (!roots_info_cache)
10176                 return;
10177
10178         while (!cache_tree_empty(roots_info_cache)) {
10179                 struct cache_extent *entry;
10180                 struct root_item_info *rii;
10181
10182                 entry = first_cache_extent(roots_info_cache);
10183                 if (!entry)
10184                         break;
10185                 remove_cache_extent(roots_info_cache, entry);
10186                 rii = container_of(entry, struct root_item_info, cache_extent);
10187                 free(rii);
10188         }
10189
10190         free(roots_info_cache);
10191         roots_info_cache = NULL;
10192 }
10193
10194 static int build_roots_info_cache(struct btrfs_fs_info *info)
10195 {
10196         int ret = 0;
10197         struct btrfs_key key;
10198         struct extent_buffer *leaf;
10199         struct btrfs_path *path;
10200
10201         if (!roots_info_cache) {
10202                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10203                 if (!roots_info_cache)
10204                         return -ENOMEM;
10205                 cache_tree_init(roots_info_cache);
10206         }
10207
10208         path = btrfs_alloc_path();
10209         if (!path)
10210                 return -ENOMEM;
10211
10212         key.objectid = 0;
10213         key.type = BTRFS_EXTENT_ITEM_KEY;
10214         key.offset = 0;
10215
10216         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10217         if (ret < 0)
10218                 goto out;
10219         leaf = path->nodes[0];
10220
10221         while (1) {
10222                 struct btrfs_key found_key;
10223                 struct btrfs_extent_item *ei;
10224                 struct btrfs_extent_inline_ref *iref;
10225                 int slot = path->slots[0];
10226                 int type;
10227                 u64 flags;
10228                 u64 root_id;
10229                 u8 level;
10230                 struct cache_extent *entry;
10231                 struct root_item_info *rii;
10232
10233                 if (slot >= btrfs_header_nritems(leaf)) {
10234                         ret = btrfs_next_leaf(info->extent_root, path);
10235                         if (ret < 0) {
10236                                 break;
10237                         } else if (ret) {
10238                                 ret = 0;
10239                                 break;
10240                         }
10241                         leaf = path->nodes[0];
10242                         slot = path->slots[0];
10243                 }
10244
10245                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10246
10247                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10248                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10249                         goto next;
10250
10251                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10252                 flags = btrfs_extent_flags(leaf, ei);
10253
10254                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10255                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10256                         goto next;
10257
10258                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10259                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10260                         level = found_key.offset;
10261                 } else {
10262                         struct btrfs_tree_block_info *binfo;
10263
10264                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10265                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10266                         level = btrfs_tree_block_level(leaf, binfo);
10267                 }
10268
10269                 /*
10270                  * For a root extent, it must be of the following type and the
10271                  * first (and only one) iref in the item.
10272                  */
10273                 type = btrfs_extent_inline_ref_type(leaf, iref);
10274                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10275                         goto next;
10276
10277                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10278                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10279                 if (!entry) {
10280                         rii = malloc(sizeof(struct root_item_info));
10281                         if (!rii) {
10282                                 ret = -ENOMEM;
10283                                 goto out;
10284                         }
10285                         rii->cache_extent.start = root_id;
10286                         rii->cache_extent.size = 1;
10287                         rii->level = (u8)-1;
10288                         entry = &rii->cache_extent;
10289                         ret = insert_cache_extent(roots_info_cache, entry);
10290                         ASSERT(ret == 0);
10291                 } else {
10292                         rii = container_of(entry, struct root_item_info,
10293                                            cache_extent);
10294                 }
10295
10296                 ASSERT(rii->cache_extent.start == root_id);
10297                 ASSERT(rii->cache_extent.size == 1);
10298
10299                 if (level > rii->level || rii->level == (u8)-1) {
10300                         rii->level = level;
10301                         rii->bytenr = found_key.objectid;
10302                         rii->gen = btrfs_extent_generation(leaf, ei);
10303                         rii->node_count = 1;
10304                 } else if (level == rii->level) {
10305                         rii->node_count++;
10306                 }
10307 next:
10308                 path->slots[0]++;
10309         }
10310
10311 out:
10312         btrfs_free_path(path);
10313
10314         return ret;
10315 }
10316
10317 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10318                                   struct btrfs_path *path,
10319                                   const struct btrfs_key *root_key,
10320                                   const int read_only_mode)
10321 {
10322         const u64 root_id = root_key->objectid;
10323         struct cache_extent *entry;
10324         struct root_item_info *rii;
10325         struct btrfs_root_item ri;
10326         unsigned long offset;
10327
10328         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10329         if (!entry) {
10330                 fprintf(stderr,
10331                         "Error: could not find extent items for root %llu\n",
10332                         root_key->objectid);
10333                 return -ENOENT;
10334         }
10335
10336         rii = container_of(entry, struct root_item_info, cache_extent);
10337         ASSERT(rii->cache_extent.start == root_id);
10338         ASSERT(rii->cache_extent.size == 1);
10339
10340         if (rii->node_count != 1) {
10341                 fprintf(stderr,
10342                         "Error: could not find btree root extent for root %llu\n",
10343                         root_id);
10344                 return -ENOENT;
10345         }
10346
10347         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
10348         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
10349
10350         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
10351             btrfs_root_level(&ri) != rii->level ||
10352             btrfs_root_generation(&ri) != rii->gen) {
10353
10354                 /*
10355                  * If we're in repair mode but our caller told us to not update
10356                  * the root item, i.e. just check if it needs to be updated, don't
10357                  * print this message, since the caller will call us again shortly
10358                  * for the same root item without read only mode (the caller will
10359                  * open a transaction first).
10360                  */
10361                 if (!(read_only_mode && repair))
10362                         fprintf(stderr,
10363                                 "%sroot item for root %llu,"
10364                                 " current bytenr %llu, current gen %llu, current level %u,"
10365                                 " new bytenr %llu, new gen %llu, new level %u\n",
10366                                 (read_only_mode ? "" : "fixing "),
10367                                 root_id,
10368                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10369                                 btrfs_root_level(&ri),
10370                                 rii->bytenr, rii->gen, rii->level);
10371
10372                 if (btrfs_root_generation(&ri) > rii->gen) {
10373                         fprintf(stderr,
10374                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
10375                                 root_id, btrfs_root_generation(&ri), rii->gen);
10376                         return -EINVAL;
10377                 }
10378
10379                 if (!read_only_mode) {
10380                         btrfs_set_root_bytenr(&ri, rii->bytenr);
10381                         btrfs_set_root_level(&ri, rii->level);
10382                         btrfs_set_root_generation(&ri, rii->gen);
10383                         write_extent_buffer(path->nodes[0], &ri,
10384                                             offset, sizeof(ri));
10385                 }
10386
10387                 return 1;
10388         }
10389
10390         return 0;
10391 }
10392
10393 /*
10394  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
10395  * caused read-only snapshots to be corrupted if they were created at a moment
10396  * when the source subvolume/snapshot had orphan items. The issue was that the
10397  * on-disk root items became incorrect, referring to the pre orphan cleanup root
10398  * node instead of the post orphan cleanup root node.
10399  * So this function, and its callees, just detects and fixes those cases. Even
10400  * though the regression was for read-only snapshots, this function applies to
10401  * any snapshot/subvolume root.
10402  * This must be run before any other repair code - not doing it so, makes other
10403  * repair code delete or modify backrefs in the extent tree for example, which
10404  * will result in an inconsistent fs after repairing the root items.
10405  */
10406 static int repair_root_items(struct btrfs_fs_info *info)
10407 {
10408         struct btrfs_path *path = NULL;
10409         struct btrfs_key key;
10410         struct extent_buffer *leaf;
10411         struct btrfs_trans_handle *trans = NULL;
10412         int ret = 0;
10413         int bad_roots = 0;
10414         int need_trans = 0;
10415
10416         ret = build_roots_info_cache(info);
10417         if (ret)
10418                 goto out;
10419
10420         path = btrfs_alloc_path();
10421         if (!path) {
10422                 ret = -ENOMEM;
10423                 goto out;
10424         }
10425
10426         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
10427         key.type = BTRFS_ROOT_ITEM_KEY;
10428         key.offset = 0;
10429
10430 again:
10431         /*
10432          * Avoid opening and committing transactions if a leaf doesn't have
10433          * any root items that need to be fixed, so that we avoid rotating
10434          * backup roots unnecessarily.
10435          */
10436         if (need_trans) {
10437                 trans = btrfs_start_transaction(info->tree_root, 1);
10438                 if (IS_ERR(trans)) {
10439                         ret = PTR_ERR(trans);
10440                         goto out;
10441                 }
10442         }
10443
10444         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
10445                                 0, trans ? 1 : 0);
10446         if (ret < 0)
10447                 goto out;
10448         leaf = path->nodes[0];
10449
10450         while (1) {
10451                 struct btrfs_key found_key;
10452
10453                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
10454                         int no_more_keys = find_next_key(path, &key);
10455
10456                         btrfs_release_path(path);
10457                         if (trans) {
10458                                 ret = btrfs_commit_transaction(trans,
10459                                                                info->tree_root);
10460                                 trans = NULL;
10461                                 if (ret < 0)
10462                                         goto out;
10463                         }
10464                         need_trans = 0;
10465                         if (no_more_keys)
10466                                 break;
10467                         goto again;
10468                 }
10469
10470                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10471
10472                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
10473                         goto next;
10474                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
10475                         goto next;
10476
10477                 ret = maybe_repair_root_item(info, path, &found_key,
10478                                              trans ? 0 : 1);
10479                 if (ret < 0)
10480                         goto out;
10481                 if (ret) {
10482                         if (!trans && repair) {
10483                                 need_trans = 1;
10484                                 key = found_key;
10485                                 btrfs_release_path(path);
10486                                 goto again;
10487                         }
10488                         bad_roots++;
10489                 }
10490 next:
10491                 path->slots[0]++;
10492         }
10493         ret = 0;
10494 out:
10495         free_roots_info_cache();
10496         btrfs_free_path(path);
10497         if (trans)
10498                 btrfs_commit_transaction(trans, info->tree_root);
10499         if (ret < 0)
10500                 return ret;
10501
10502         return bad_roots;
10503 }
10504
10505 const char * const cmd_check_usage[] = {
10506         "btrfs check [options] <device>",
10507         "Check structural integrity of a filesystem (unmounted).",
10508         "Check structural integrity of an unmounted filesystem. Verify internal",
10509         "trees' consistency and item connectivity. In the repair mode try to",
10510         "fix the problems found.",
10511         "WARNING: the repair mode is considered dangerous",
10512         "",
10513         "-s|--super <superblock>     use this superblock copy",
10514         "-b|--backup                 use the first valid backup root copy",
10515         "--repair                    try to repair the filesystem",
10516         "--readonly                  run in read-only mode (default)",
10517         "--init-csum-tree            create a new CRC tree",
10518         "--init-extent-tree          create a new extent tree",
10519         "--check-data-csum           verify checksums of data blocks",
10520         "-Q|--qgroup-report           print a report on qgroup consistency",
10521         "-E|--subvol-extents <subvolid>",
10522         "                            print subvolume extents and sharing state",
10523         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
10524         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
10525         "-p|--progress               indicate progress",
10526         NULL
10527 };
10528
10529 int cmd_check(int argc, char **argv)
10530 {
10531         struct cache_tree root_cache;
10532         struct btrfs_root *root;
10533         struct btrfs_fs_info *info;
10534         u64 bytenr = 0;
10535         u64 subvolid = 0;
10536         u64 tree_root_bytenr = 0;
10537         u64 chunk_root_bytenr = 0;
10538         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
10539         int ret;
10540         u64 num;
10541         int init_csum_tree = 0;
10542         int readonly = 0;
10543         int qgroup_report = 0;
10544         int qgroups_repaired = 0;
10545         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
10546
10547         while(1) {
10548                 int c;
10549                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
10550                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
10551                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
10552                 static const struct option long_options[] = {
10553                         { "super", required_argument, NULL, 's' },
10554                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
10555                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
10556                         { "init-csum-tree", no_argument, NULL,
10557                                 GETOPT_VAL_INIT_CSUM },
10558                         { "init-extent-tree", no_argument, NULL,
10559                                 GETOPT_VAL_INIT_EXTENT },
10560                         { "check-data-csum", no_argument, NULL,
10561                                 GETOPT_VAL_CHECK_CSUM },
10562                         { "backup", no_argument, NULL, 'b' },
10563                         { "subvol-extents", required_argument, NULL, 'E' },
10564                         { "qgroup-report", no_argument, NULL, 'Q' },
10565                         { "tree-root", required_argument, NULL, 'r' },
10566                         { "chunk-root", required_argument, NULL,
10567                                 GETOPT_VAL_CHUNK_TREE },
10568                         { "progress", no_argument, NULL, 'p' },
10569                         { NULL, 0, NULL, 0}
10570                 };
10571
10572                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
10573                 if (c < 0)
10574                         break;
10575                 switch(c) {
10576                         case 'a': /* ignored */ break;
10577                         case 'b':
10578                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
10579                                 break;
10580                         case 's':
10581                                 num = arg_strtou64(optarg);
10582                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
10583                                         fprintf(stderr,
10584                                                 "ERROR: super mirror should be less than: %d\n",
10585                                                 BTRFS_SUPER_MIRROR_MAX);
10586                                         exit(1);
10587                                 }
10588                                 bytenr = btrfs_sb_offset(((int)num));
10589                                 printf("using SB copy %llu, bytenr %llu\n", num,
10590                                        (unsigned long long)bytenr);
10591                                 break;
10592                         case 'Q':
10593                                 qgroup_report = 1;
10594                                 break;
10595                         case 'E':
10596                                 subvolid = arg_strtou64(optarg);
10597                                 break;
10598                         case 'r':
10599                                 tree_root_bytenr = arg_strtou64(optarg);
10600                                 break;
10601                         case GETOPT_VAL_CHUNK_TREE:
10602                                 chunk_root_bytenr = arg_strtou64(optarg);
10603                                 break;
10604                         case 'p':
10605                                 ctx.progress_enabled = true;
10606                                 break;
10607                         case '?':
10608                         case 'h':
10609                                 usage(cmd_check_usage);
10610                         case GETOPT_VAL_REPAIR:
10611                                 printf("enabling repair mode\n");
10612                                 repair = 1;
10613                                 ctree_flags |= OPEN_CTREE_WRITES;
10614                                 break;
10615                         case GETOPT_VAL_READONLY:
10616                                 readonly = 1;
10617                                 break;
10618                         case GETOPT_VAL_INIT_CSUM:
10619                                 printf("Creating a new CRC tree\n");
10620                                 init_csum_tree = 1;
10621                                 repair = 1;
10622                                 ctree_flags |= OPEN_CTREE_WRITES;
10623                                 break;
10624                         case GETOPT_VAL_INIT_EXTENT:
10625                                 init_extent_tree = 1;
10626                                 ctree_flags |= (OPEN_CTREE_WRITES |
10627                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
10628                                 repair = 1;
10629                                 break;
10630                         case GETOPT_VAL_CHECK_CSUM:
10631                                 check_data_csum = 1;
10632                                 break;
10633                 }
10634         }
10635
10636         if (check_argc_exact(argc - optind, 1))
10637                 usage(cmd_check_usage);
10638
10639         if (ctx.progress_enabled) {
10640                 ctx.tp = TASK_NOTHING;
10641                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
10642         }
10643
10644         /* This check is the only reason for --readonly to exist */
10645         if (readonly && repair) {
10646                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
10647                 exit(1);
10648         }
10649
10650         radix_tree_init();
10651         cache_tree_init(&root_cache);
10652
10653         if((ret = check_mounted(argv[optind])) < 0) {
10654                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
10655                 goto err_out;
10656         } else if(ret) {
10657                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
10658                 ret = -EBUSY;
10659                 goto err_out;
10660         }
10661
10662         /* only allow partial opening under repair mode */
10663         if (repair)
10664                 ctree_flags |= OPEN_CTREE_PARTIAL;
10665
10666         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
10667                                   chunk_root_bytenr, ctree_flags);
10668         if (!info) {
10669                 fprintf(stderr, "Couldn't open file system\n");
10670                 ret = -EIO;
10671                 goto err_out;
10672         }
10673
10674         global_info = info;
10675         root = info->fs_root;
10676
10677         /*
10678          * repair mode will force us to commit transaction which
10679          * will make us fail to load log tree when mounting.
10680          */
10681         if (repair && btrfs_super_log_root(info->super_copy)) {
10682                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
10683                 if (!ret) {
10684                         ret = 1;
10685                         goto close_out;
10686                 }
10687                 ret = zero_log_tree(root);
10688                 if (ret) {
10689                         fprintf(stderr, "fail to zero log tree\n");
10690                         goto close_out;
10691                 }
10692         }
10693
10694         uuid_unparse(info->super_copy->fsid, uuidbuf);
10695         if (qgroup_report) {
10696                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
10697                        uuidbuf);
10698                 ret = qgroup_verify_all(info);
10699                 if (ret == 0)
10700                         report_qgroups(1);
10701                 goto close_out;
10702         }
10703         if (subvolid) {
10704                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
10705                        subvolid, argv[optind], uuidbuf);
10706                 ret = print_extent_state(info, subvolid);
10707                 goto close_out;
10708         }
10709         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
10710
10711         if (!extent_buffer_uptodate(info->tree_root->node) ||
10712             !extent_buffer_uptodate(info->dev_root->node) ||
10713             !extent_buffer_uptodate(info->chunk_root->node)) {
10714                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10715                 ret = -EIO;
10716                 goto close_out;
10717         }
10718
10719         if (init_extent_tree || init_csum_tree) {
10720                 struct btrfs_trans_handle *trans;
10721
10722                 trans = btrfs_start_transaction(info->extent_root, 0);
10723                 if (IS_ERR(trans)) {
10724                         fprintf(stderr, "Error starting transaction\n");
10725                         ret = PTR_ERR(trans);
10726                         goto close_out;
10727                 }
10728
10729                 if (init_extent_tree) {
10730                         printf("Creating a new extent tree\n");
10731                         ret = reinit_extent_tree(trans, info);
10732                         if (ret)
10733                                 goto close_out;
10734                 }
10735
10736                 if (init_csum_tree) {
10737                         fprintf(stderr, "Reinit crc root\n");
10738                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
10739                         if (ret) {
10740                                 fprintf(stderr, "crc root initialization failed\n");
10741                                 ret = -EIO;
10742                                 goto close_out;
10743                         }
10744
10745                         ret = fill_csum_tree(trans, info->csum_root,
10746                                              init_extent_tree);
10747                         if (ret) {
10748                                 fprintf(stderr, "crc refilling failed\n");
10749                                 return -EIO;
10750                         }
10751                 }
10752                 /*
10753                  * Ok now we commit and run the normal fsck, which will add
10754                  * extent entries for all of the items it finds.
10755                  */
10756                 ret = btrfs_commit_transaction(trans, info->extent_root);
10757                 if (ret)
10758                         goto close_out;
10759         }
10760         if (!extent_buffer_uptodate(info->extent_root->node)) {
10761                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10762                 ret = -EIO;
10763                 goto close_out;
10764         }
10765         if (!extent_buffer_uptodate(info->csum_root->node)) {
10766                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
10767                 ret = -EIO;
10768                 goto close_out;
10769         }
10770
10771         if (!ctx.progress_enabled)
10772                 fprintf(stderr, "checking extents\n");
10773         ret = check_chunks_and_extents(root);
10774         if (ret)
10775                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
10776
10777         ret = repair_root_items(info);
10778         if (ret < 0)
10779                 goto close_out;
10780         if (repair) {
10781                 fprintf(stderr, "Fixed %d roots.\n", ret);
10782                 ret = 0;
10783         } else if (ret > 0) {
10784                 fprintf(stderr,
10785                        "Found %d roots with an outdated root item.\n",
10786                        ret);
10787                 fprintf(stderr,
10788                         "Please run a filesystem check with the option --repair to fix them.\n");
10789                 ret = 1;
10790                 goto close_out;
10791         }
10792
10793         if (!ctx.progress_enabled) {
10794                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
10795                         fprintf(stderr, "checking free space tree\n");
10796                 else
10797                         fprintf(stderr, "checking free space cache\n");
10798         }
10799         ret = check_space_cache(root);
10800         if (ret)
10801                 goto out;
10802
10803         /*
10804          * We used to have to have these hole extents in between our real
10805          * extents so if we don't have this flag set we need to make sure there
10806          * are no gaps in the file extents for inodes, otherwise we can just
10807          * ignore it when this happens.
10808          */
10809         no_holes = btrfs_fs_incompat(root->fs_info,
10810                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
10811         if (!ctx.progress_enabled)
10812                 fprintf(stderr, "checking fs roots\n");
10813         ret = check_fs_roots(root, &root_cache);
10814         if (ret)
10815                 goto out;
10816
10817         fprintf(stderr, "checking csums\n");
10818         ret = check_csums(root);
10819         if (ret)
10820                 goto out;
10821
10822         fprintf(stderr, "checking root refs\n");
10823         ret = check_root_refs(root, &root_cache);
10824         if (ret)
10825                 goto out;
10826
10827         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
10828                 struct extent_buffer *eb;
10829
10830                 eb = list_first_entry(&root->fs_info->recow_ebs,
10831                                       struct extent_buffer, recow);
10832                 list_del_init(&eb->recow);
10833                 ret = recow_extent_buffer(root, eb);
10834                 if (ret)
10835                         break;
10836         }
10837
10838         while (!list_empty(&delete_items)) {
10839                 struct bad_item *bad;
10840
10841                 bad = list_first_entry(&delete_items, struct bad_item, list);
10842                 list_del_init(&bad->list);
10843                 if (repair)
10844                         ret = delete_bad_item(root, bad);
10845                 free(bad);
10846         }
10847
10848         if (info->quota_enabled) {
10849                 int err;
10850                 fprintf(stderr, "checking quota groups\n");
10851                 err = qgroup_verify_all(info);
10852                 if (err)
10853                         goto out;
10854                 report_qgroups(0);
10855                 err = repair_qgroups(info, &qgroups_repaired);
10856                 if (err)
10857                         goto out;
10858         }
10859
10860         if (!list_empty(&root->fs_info->recow_ebs)) {
10861                 fprintf(stderr, "Transid errors in file system\n");
10862                 ret = 1;
10863         }
10864 out:
10865         /* Don't override original ret */
10866         if (!ret && qgroups_repaired)
10867                 ret = qgroups_repaired;
10868
10869         if (found_old_backref) { /*
10870                  * there was a disk format change when mixed
10871                  * backref was in testing tree. The old format
10872                  * existed about one week.
10873                  */
10874                 printf("\n * Found old mixed backref format. "
10875                        "The old format is not supported! *"
10876                        "\n * Please mount the FS in readonly mode, "
10877                        "backup data and re-format the FS. *\n\n");
10878                 ret = 1;
10879         }
10880         printf("found %llu bytes used err is %d\n",
10881                (unsigned long long)bytes_used, ret);
10882         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10883         printf("total tree bytes: %llu\n",
10884                (unsigned long long)total_btree_bytes);
10885         printf("total fs tree bytes: %llu\n",
10886                (unsigned long long)total_fs_tree_bytes);
10887         printf("total extent tree bytes: %llu\n",
10888                (unsigned long long)total_extent_tree_bytes);
10889         printf("btree space waste bytes: %llu\n",
10890                (unsigned long long)btree_space_waste);
10891         printf("file data blocks allocated: %llu\n referenced %llu\n",
10892                 (unsigned long long)data_bytes_allocated,
10893                 (unsigned long long)data_bytes_referenced);
10894
10895         free_qgroup_counts();
10896         free_root_recs_tree(&root_cache);
10897 close_out:
10898         close_ctree(root);
10899 err_out:
10900         if (ctx.progress_enabled)
10901                 task_deinit(ctx.info);
10902
10903         return ret;
10904 }