btrfs-progs: check: introduce function to check shared data backref
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
76
77 struct extent_backref {
78         struct rb_node node;
79         unsigned int is_data:1;
80         unsigned int found_extent_tree:1;
81         unsigned int full_backref:1;
82         unsigned int found_ref:1;
83         unsigned int broken:1;
84 };
85
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
87 {
88         return rb_entry(node, struct extent_backref, node);
89 }
90
91 struct data_backref {
92         struct extent_backref node;
93         union {
94                 u64 parent;
95                 u64 root;
96         };
97         u64 owner;
98         u64 offset;
99         u64 disk_bytenr;
100         u64 bytes;
101         u64 ram_bytes;
102         u32 num_refs;
103         u32 found_ref;
104 };
105
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
107 {
108         return container_of(back, struct data_backref, node);
109 }
110
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
112 {
113         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115         struct data_backref *back1 = to_data_backref(ext1);
116         struct data_backref *back2 = to_data_backref(ext2);
117
118         WARN_ON(!ext1->is_data);
119         WARN_ON(!ext2->is_data);
120
121         /* parent and root are a union, so this covers both */
122         if (back1->parent > back2->parent)
123                 return 1;
124         if (back1->parent < back2->parent)
125                 return -1;
126
127         /* This is a full backref and the parents match. */
128         if (back1->node.full_backref)
129                 return 0;
130
131         if (back1->owner > back2->owner)
132                 return 1;
133         if (back1->owner < back2->owner)
134                 return -1;
135
136         if (back1->offset > back2->offset)
137                 return 1;
138         if (back1->offset < back2->offset)
139                 return -1;
140
141         if (back1->bytes > back2->bytes)
142                 return 1;
143         if (back1->bytes < back2->bytes)
144                 return -1;
145
146         if (back1->found_ref && back2->found_ref) {
147                 if (back1->disk_bytenr > back2->disk_bytenr)
148                         return 1;
149                 if (back1->disk_bytenr < back2->disk_bytenr)
150                         return -1;
151
152                 if (back1->found_ref > back2->found_ref)
153                         return 1;
154                 if (back1->found_ref < back2->found_ref)
155                         return -1;
156         }
157
158         return 0;
159 }
160
161 /*
162  * Much like data_backref, just removed the undetermined members
163  * and change it to use list_head.
164  * During extent scan, it is stored in root->orphan_data_extent.
165  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
166  */
167 struct orphan_data_extent {
168         struct list_head list;
169         u64 root;
170         u64 objectid;
171         u64 offset;
172         u64 disk_bytenr;
173         u64 disk_len;
174 };
175
176 struct tree_backref {
177         struct extent_backref node;
178         union {
179                 u64 parent;
180                 u64 root;
181         };
182 };
183
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
185 {
186         return container_of(back, struct tree_backref, node);
187 }
188
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
190 {
191         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193         struct tree_backref *back1 = to_tree_backref(ext1);
194         struct tree_backref *back2 = to_tree_backref(ext2);
195
196         WARN_ON(ext1->is_data);
197         WARN_ON(ext2->is_data);
198
199         /* parent and root are a union, so this covers both */
200         if (back1->parent > back2->parent)
201                 return 1;
202         if (back1->parent < back2->parent)
203                 return -1;
204
205         return 0;
206 }
207
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
209 {
210         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
212
213         if (ext1->is_data > ext2->is_data)
214                 return 1;
215
216         if (ext1->is_data < ext2->is_data)
217                 return -1;
218
219         if (ext1->full_backref > ext2->full_backref)
220                 return 1;
221         if (ext1->full_backref < ext2->full_backref)
222                 return -1;
223
224         if (ext1->is_data)
225                 return compare_data_backref(node1, node2);
226         else
227                 return compare_tree_backref(node1, node2);
228 }
229
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
232
233 struct extent_record {
234         struct list_head backrefs;
235         struct list_head dups;
236         struct rb_root backref_tree;
237         struct list_head list;
238         struct cache_extent cache;
239         struct btrfs_disk_key parent_key;
240         u64 start;
241         u64 max_size;
242         u64 nr;
243         u64 refs;
244         u64 extent_item_refs;
245         u64 generation;
246         u64 parent_generation;
247         u64 info_objectid;
248         u32 num_duplicates;
249         u8 info_level;
250         unsigned int flag_block_full_backref:2;
251         unsigned int found_rec:1;
252         unsigned int content_checked:1;
253         unsigned int owner_ref_checked:1;
254         unsigned int is_root:1;
255         unsigned int metadata:1;
256         unsigned int bad_full_backref:1;
257         unsigned int crossing_stripes:1;
258         unsigned int wrong_chunk_type:1;
259 };
260
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
262 {
263         return container_of(entry, struct extent_record, list);
264 }
265
266 struct inode_backref {
267         struct list_head list;
268         unsigned int found_dir_item:1;
269         unsigned int found_dir_index:1;
270         unsigned int found_inode_ref:1;
271         unsigned int filetype:8;
272         int errors;
273         unsigned int ref_type;
274         u64 dir;
275         u64 index;
276         u16 namelen;
277         char name[0];
278 };
279
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
281 {
282         return list_entry(entry, struct inode_backref, list);
283 }
284
285 struct root_item_record {
286         struct list_head list;
287         u64 objectid;
288         u64 bytenr;
289         u64 last_snapshot;
290         u8 level;
291         u8 drop_level;
292         int level_size;
293         struct btrfs_key drop_key;
294 };
295
296 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
298 #define REF_ERR_NO_INODE_REF            (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
301 #define REF_ERR_DUP_INODE_REF           (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF             (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
309
310 struct file_extent_hole {
311         struct rb_node node;
312         u64 start;
313         u64 len;
314 };
315
316 struct inode_record {
317         struct list_head backrefs;
318         unsigned int checked:1;
319         unsigned int merging:1;
320         unsigned int found_inode_item:1;
321         unsigned int found_dir_item:1;
322         unsigned int found_file_extent:1;
323         unsigned int found_csum_item:1;
324         unsigned int some_csum_missing:1;
325         unsigned int nodatasum:1;
326         int errors;
327
328         u64 ino;
329         u32 nlink;
330         u32 imode;
331         u64 isize;
332         u64 nbytes;
333
334         u32 found_link;
335         u64 found_size;
336         u64 extent_start;
337         u64 extent_end;
338         struct rb_root holes;
339         struct list_head orphan_extents;
340
341         u32 refs;
342 };
343
344 #define I_ERR_NO_INODE_ITEM             (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
359
360 struct root_backref {
361         struct list_head list;
362         unsigned int found_dir_item:1;
363         unsigned int found_dir_index:1;
364         unsigned int found_back_ref:1;
365         unsigned int found_forward_ref:1;
366         unsigned int reachable:1;
367         int errors;
368         u64 ref_root;
369         u64 dir;
370         u64 index;
371         u16 namelen;
372         char name[0];
373 };
374
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
376 {
377         return list_entry(entry, struct root_backref, list);
378 }
379
380 struct root_record {
381         struct list_head backrefs;
382         struct cache_extent cache;
383         unsigned int found_root_item:1;
384         u64 objectid;
385         u32 found_ref;
386 };
387
388 struct ptr_node {
389         struct cache_extent cache;
390         void *data;
391 };
392
393 struct shared_node {
394         struct cache_extent cache;
395         struct cache_tree root_cache;
396         struct cache_tree inode_cache;
397         struct inode_record *current;
398         u32 refs;
399 };
400
401 struct block_info {
402         u64 start;
403         u32 size;
404 };
405
406 struct walk_control {
407         struct cache_tree shared;
408         struct shared_node *nodes[BTRFS_MAX_LEVEL];
409         int active_node;
410         int root_level;
411 };
412
413 struct bad_item {
414         struct btrfs_key key;
415         u64 root_id;
416         struct list_head list;
417 };
418
419 struct extent_entry {
420         u64 bytenr;
421         u64 bytes;
422         int count;
423         int broken;
424         struct list_head list;
425 };
426
427 struct root_item_info {
428         /* level of the root */
429         u8 level;
430         /* number of nodes at this level, must be 1 for a root */
431         int node_count;
432         u64 bytenr;
433         u64 gen;
434         struct cache_extent cache_extent;
435 };
436
437 /*
438  * Error bit for low memory mode check.
439  *
440  * Currently no caller cares about it yet.  Just internal use for error
441  * classification.
442  */
443 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
444 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
445 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
446 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
447 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
448
449 static void *print_status_check(void *p)
450 {
451         struct task_ctx *priv = p;
452         const char work_indicator[] = { '.', 'o', 'O', 'o' };
453         uint32_t count = 0;
454         static char *task_position_string[] = {
455                 "checking extents",
456                 "checking free space cache",
457                 "checking fs roots",
458         };
459
460         task_period_start(priv->info, 1000 /* 1s */);
461
462         if (priv->tp == TASK_NOTHING)
463                 return NULL;
464
465         while (1) {
466                 printf("%s [%c]\r", task_position_string[priv->tp],
467                                 work_indicator[count % 4]);
468                 count++;
469                 fflush(stdout);
470                 task_period_wait(priv->info);
471         }
472         return NULL;
473 }
474
475 static int print_status_return(void *p)
476 {
477         printf("\n");
478         fflush(stdout);
479
480         return 0;
481 }
482
483 /* Compatible function to allow reuse of old codes */
484 static u64 first_extent_gap(struct rb_root *holes)
485 {
486         struct file_extent_hole *hole;
487
488         if (RB_EMPTY_ROOT(holes))
489                 return (u64)-1;
490
491         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
492         return hole->start;
493 }
494
495 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
496 {
497         struct file_extent_hole *hole1;
498         struct file_extent_hole *hole2;
499
500         hole1 = rb_entry(node1, struct file_extent_hole, node);
501         hole2 = rb_entry(node2, struct file_extent_hole, node);
502
503         if (hole1->start > hole2->start)
504                 return -1;
505         if (hole1->start < hole2->start)
506                 return 1;
507         /* Now hole1->start == hole2->start */
508         if (hole1->len >= hole2->len)
509                 /*
510                  * Hole 1 will be merge center
511                  * Same hole will be merged later
512                  */
513                 return -1;
514         /* Hole 2 will be merge center */
515         return 1;
516 }
517
518 /*
519  * Add a hole to the record
520  *
521  * This will do hole merge for copy_file_extent_holes(),
522  * which will ensure there won't be continuous holes.
523  */
524 static int add_file_extent_hole(struct rb_root *holes,
525                                 u64 start, u64 len)
526 {
527         struct file_extent_hole *hole;
528         struct file_extent_hole *prev = NULL;
529         struct file_extent_hole *next = NULL;
530
531         hole = malloc(sizeof(*hole));
532         if (!hole)
533                 return -ENOMEM;
534         hole->start = start;
535         hole->len = len;
536         /* Since compare will not return 0, no -EEXIST will happen */
537         rb_insert(holes, &hole->node, compare_hole);
538
539         /* simple merge with previous hole */
540         if (rb_prev(&hole->node))
541                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
542                                 node);
543         if (prev && prev->start + prev->len >= hole->start) {
544                 hole->len = hole->start + hole->len - prev->start;
545                 hole->start = prev->start;
546                 rb_erase(&prev->node, holes);
547                 free(prev);
548                 prev = NULL;
549         }
550
551         /* iterate merge with next holes */
552         while (1) {
553                 if (!rb_next(&hole->node))
554                         break;
555                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
556                                         node);
557                 if (hole->start + hole->len >= next->start) {
558                         if (hole->start + hole->len <= next->start + next->len)
559                                 hole->len = next->start + next->len -
560                                             hole->start;
561                         rb_erase(&next->node, holes);
562                         free(next);
563                         next = NULL;
564                 } else
565                         break;
566         }
567         return 0;
568 }
569
570 static int compare_hole_range(struct rb_node *node, void *data)
571 {
572         struct file_extent_hole *hole;
573         u64 start;
574
575         hole = (struct file_extent_hole *)data;
576         start = hole->start;
577
578         hole = rb_entry(node, struct file_extent_hole, node);
579         if (start < hole->start)
580                 return -1;
581         if (start >= hole->start && start < hole->start + hole->len)
582                 return 0;
583         return 1;
584 }
585
586 /*
587  * Delete a hole in the record
588  *
589  * This will do the hole split and is much restrict than add.
590  */
591 static int del_file_extent_hole(struct rb_root *holes,
592                                 u64 start, u64 len)
593 {
594         struct file_extent_hole *hole;
595         struct file_extent_hole tmp;
596         u64 prev_start = 0;
597         u64 prev_len = 0;
598         u64 next_start = 0;
599         u64 next_len = 0;
600         struct rb_node *node;
601         int have_prev = 0;
602         int have_next = 0;
603         int ret = 0;
604
605         tmp.start = start;
606         tmp.len = len;
607         node = rb_search(holes, &tmp, compare_hole_range, NULL);
608         if (!node)
609                 return -EEXIST;
610         hole = rb_entry(node, struct file_extent_hole, node);
611         if (start + len > hole->start + hole->len)
612                 return -EEXIST;
613
614         /*
615          * Now there will be no overlap, delete the hole and re-add the
616          * split(s) if they exists.
617          */
618         if (start > hole->start) {
619                 prev_start = hole->start;
620                 prev_len = start - hole->start;
621                 have_prev = 1;
622         }
623         if (hole->start + hole->len > start + len) {
624                 next_start = start + len;
625                 next_len = hole->start + hole->len - start - len;
626                 have_next = 1;
627         }
628         rb_erase(node, holes);
629         free(hole);
630         if (have_prev) {
631                 ret = add_file_extent_hole(holes, prev_start, prev_len);
632                 if (ret < 0)
633                         return ret;
634         }
635         if (have_next) {
636                 ret = add_file_extent_hole(holes, next_start, next_len);
637                 if (ret < 0)
638                         return ret;
639         }
640         return 0;
641 }
642
643 static int copy_file_extent_holes(struct rb_root *dst,
644                                   struct rb_root *src)
645 {
646         struct file_extent_hole *hole;
647         struct rb_node *node;
648         int ret = 0;
649
650         node = rb_first(src);
651         while (node) {
652                 hole = rb_entry(node, struct file_extent_hole, node);
653                 ret = add_file_extent_hole(dst, hole->start, hole->len);
654                 if (ret)
655                         break;
656                 node = rb_next(node);
657         }
658         return ret;
659 }
660
661 static void free_file_extent_holes(struct rb_root *holes)
662 {
663         struct rb_node *node;
664         struct file_extent_hole *hole;
665
666         node = rb_first(holes);
667         while (node) {
668                 hole = rb_entry(node, struct file_extent_hole, node);
669                 rb_erase(node, holes);
670                 free(hole);
671                 node = rb_first(holes);
672         }
673 }
674
675 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
676
677 static void record_root_in_trans(struct btrfs_trans_handle *trans,
678                                  struct btrfs_root *root)
679 {
680         if (root->last_trans != trans->transid) {
681                 root->track_dirty = 1;
682                 root->last_trans = trans->transid;
683                 root->commit_root = root->node;
684                 extent_buffer_get(root->node);
685         }
686 }
687
688 static u8 imode_to_type(u32 imode)
689 {
690 #define S_SHIFT 12
691         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
692                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
693                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
694                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
695                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
696                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
697                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
698                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
699         };
700
701         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
702 #undef S_SHIFT
703 }
704
705 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
706 {
707         struct device_record *rec1;
708         struct device_record *rec2;
709
710         rec1 = rb_entry(node1, struct device_record, node);
711         rec2 = rb_entry(node2, struct device_record, node);
712         if (rec1->devid > rec2->devid)
713                 return -1;
714         else if (rec1->devid < rec2->devid)
715                 return 1;
716         else
717                 return 0;
718 }
719
720 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
721 {
722         struct inode_record *rec;
723         struct inode_backref *backref;
724         struct inode_backref *orig;
725         struct inode_backref *tmp;
726         struct orphan_data_extent *src_orphan;
727         struct orphan_data_extent *dst_orphan;
728         size_t size;
729         int ret;
730
731         rec = malloc(sizeof(*rec));
732         if (!rec)
733                 return ERR_PTR(-ENOMEM);
734         memcpy(rec, orig_rec, sizeof(*rec));
735         rec->refs = 1;
736         INIT_LIST_HEAD(&rec->backrefs);
737         INIT_LIST_HEAD(&rec->orphan_extents);
738         rec->holes = RB_ROOT;
739
740         list_for_each_entry(orig, &orig_rec->backrefs, list) {
741                 size = sizeof(*orig) + orig->namelen + 1;
742                 backref = malloc(size);
743                 if (!backref) {
744                         ret = -ENOMEM;
745                         goto cleanup;
746                 }
747                 memcpy(backref, orig, size);
748                 list_add_tail(&backref->list, &rec->backrefs);
749         }
750         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
751                 dst_orphan = malloc(sizeof(*dst_orphan));
752                 if (!dst_orphan) {
753                         ret = -ENOMEM;
754                         goto cleanup;
755                 }
756                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
757                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
758         }
759         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
760         BUG_ON(ret < 0);
761
762         return rec;
763
764 cleanup:
765         if (!list_empty(&rec->backrefs))
766                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
767                         list_del(&orig->list);
768                         free(orig);
769                 }
770
771         if (!list_empty(&rec->orphan_extents))
772                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
773                         list_del(&orig->list);
774                         free(orig);
775                 }
776
777         free(rec);
778
779         return ERR_PTR(ret);
780 }
781
782 static void print_orphan_data_extents(struct list_head *orphan_extents,
783                                       u64 objectid)
784 {
785         struct orphan_data_extent *orphan;
786
787         if (list_empty(orphan_extents))
788                 return;
789         printf("The following data extent is lost in tree %llu:\n",
790                objectid);
791         list_for_each_entry(orphan, orphan_extents, list) {
792                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
793                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
794                        orphan->disk_len);
795         }
796 }
797
798 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
799 {
800         u64 root_objectid = root->root_key.objectid;
801         int errors = rec->errors;
802
803         if (!errors)
804                 return;
805         /* reloc root errors, we print its corresponding fs root objectid*/
806         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
807                 root_objectid = root->root_key.offset;
808                 fprintf(stderr, "reloc");
809         }
810         fprintf(stderr, "root %llu inode %llu errors %x",
811                 (unsigned long long) root_objectid,
812                 (unsigned long long) rec->ino, rec->errors);
813
814         if (errors & I_ERR_NO_INODE_ITEM)
815                 fprintf(stderr, ", no inode item");
816         if (errors & I_ERR_NO_ORPHAN_ITEM)
817                 fprintf(stderr, ", no orphan item");
818         if (errors & I_ERR_DUP_INODE_ITEM)
819                 fprintf(stderr, ", dup inode item");
820         if (errors & I_ERR_DUP_DIR_INDEX)
821                 fprintf(stderr, ", dup dir index");
822         if (errors & I_ERR_ODD_DIR_ITEM)
823                 fprintf(stderr, ", odd dir item");
824         if (errors & I_ERR_ODD_FILE_EXTENT)
825                 fprintf(stderr, ", odd file extent");
826         if (errors & I_ERR_BAD_FILE_EXTENT)
827                 fprintf(stderr, ", bad file extent");
828         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
829                 fprintf(stderr, ", file extent overlap");
830         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
831                 fprintf(stderr, ", file extent discount");
832         if (errors & I_ERR_DIR_ISIZE_WRONG)
833                 fprintf(stderr, ", dir isize wrong");
834         if (errors & I_ERR_FILE_NBYTES_WRONG)
835                 fprintf(stderr, ", nbytes wrong");
836         if (errors & I_ERR_ODD_CSUM_ITEM)
837                 fprintf(stderr, ", odd csum item");
838         if (errors & I_ERR_SOME_CSUM_MISSING)
839                 fprintf(stderr, ", some csum missing");
840         if (errors & I_ERR_LINK_COUNT_WRONG)
841                 fprintf(stderr, ", link count wrong");
842         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
843                 fprintf(stderr, ", orphan file extent");
844         fprintf(stderr, "\n");
845         /* Print the orphan extents if needed */
846         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
847                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
848
849         /* Print the holes if needed */
850         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
851                 struct file_extent_hole *hole;
852                 struct rb_node *node;
853                 int found = 0;
854
855                 node = rb_first(&rec->holes);
856                 fprintf(stderr, "Found file extent holes:\n");
857                 while (node) {
858                         found = 1;
859                         hole = rb_entry(node, struct file_extent_hole, node);
860                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
861                                 hole->start, hole->len);
862                         node = rb_next(node);
863                 }
864                 if (!found)
865                         fprintf(stderr, "\tstart: 0, len: %llu\n",
866                                 round_up(rec->isize, root->sectorsize));
867         }
868 }
869
870 static void print_ref_error(int errors)
871 {
872         if (errors & REF_ERR_NO_DIR_ITEM)
873                 fprintf(stderr, ", no dir item");
874         if (errors & REF_ERR_NO_DIR_INDEX)
875                 fprintf(stderr, ", no dir index");
876         if (errors & REF_ERR_NO_INODE_REF)
877                 fprintf(stderr, ", no inode ref");
878         if (errors & REF_ERR_DUP_DIR_ITEM)
879                 fprintf(stderr, ", dup dir item");
880         if (errors & REF_ERR_DUP_DIR_INDEX)
881                 fprintf(stderr, ", dup dir index");
882         if (errors & REF_ERR_DUP_INODE_REF)
883                 fprintf(stderr, ", dup inode ref");
884         if (errors & REF_ERR_INDEX_UNMATCH)
885                 fprintf(stderr, ", index mismatch");
886         if (errors & REF_ERR_FILETYPE_UNMATCH)
887                 fprintf(stderr, ", filetype mismatch");
888         if (errors & REF_ERR_NAME_TOO_LONG)
889                 fprintf(stderr, ", name too long");
890         if (errors & REF_ERR_NO_ROOT_REF)
891                 fprintf(stderr, ", no root ref");
892         if (errors & REF_ERR_NO_ROOT_BACKREF)
893                 fprintf(stderr, ", no root backref");
894         if (errors & REF_ERR_DUP_ROOT_REF)
895                 fprintf(stderr, ", dup root ref");
896         if (errors & REF_ERR_DUP_ROOT_BACKREF)
897                 fprintf(stderr, ", dup root backref");
898         fprintf(stderr, "\n");
899 }
900
901 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
902                                           u64 ino, int mod)
903 {
904         struct ptr_node *node;
905         struct cache_extent *cache;
906         struct inode_record *rec = NULL;
907         int ret;
908
909         cache = lookup_cache_extent(inode_cache, ino, 1);
910         if (cache) {
911                 node = container_of(cache, struct ptr_node, cache);
912                 rec = node->data;
913                 if (mod && rec->refs > 1) {
914                         node->data = clone_inode_rec(rec);
915                         if (IS_ERR(node->data))
916                                 return node->data;
917                         rec->refs--;
918                         rec = node->data;
919                 }
920         } else if (mod) {
921                 rec = calloc(1, sizeof(*rec));
922                 if (!rec)
923                         return ERR_PTR(-ENOMEM);
924                 rec->ino = ino;
925                 rec->extent_start = (u64)-1;
926                 rec->refs = 1;
927                 INIT_LIST_HEAD(&rec->backrefs);
928                 INIT_LIST_HEAD(&rec->orphan_extents);
929                 rec->holes = RB_ROOT;
930
931                 node = malloc(sizeof(*node));
932                 if (!node) {
933                         free(rec);
934                         return ERR_PTR(-ENOMEM);
935                 }
936                 node->cache.start = ino;
937                 node->cache.size = 1;
938                 node->data = rec;
939
940                 if (ino == BTRFS_FREE_INO_OBJECTID)
941                         rec->found_link = 1;
942
943                 ret = insert_cache_extent(inode_cache, &node->cache);
944                 if (ret)
945                         return ERR_PTR(-EEXIST);
946         }
947         return rec;
948 }
949
950 static void free_orphan_data_extents(struct list_head *orphan_extents)
951 {
952         struct orphan_data_extent *orphan;
953
954         while (!list_empty(orphan_extents)) {
955                 orphan = list_entry(orphan_extents->next,
956                                     struct orphan_data_extent, list);
957                 list_del(&orphan->list);
958                 free(orphan);
959         }
960 }
961
962 static void free_inode_rec(struct inode_record *rec)
963 {
964         struct inode_backref *backref;
965
966         if (--rec->refs > 0)
967                 return;
968
969         while (!list_empty(&rec->backrefs)) {
970                 backref = to_inode_backref(rec->backrefs.next);
971                 list_del(&backref->list);
972                 free(backref);
973         }
974         free_orphan_data_extents(&rec->orphan_extents);
975         free_file_extent_holes(&rec->holes);
976         free(rec);
977 }
978
979 static int can_free_inode_rec(struct inode_record *rec)
980 {
981         if (!rec->errors && rec->checked && rec->found_inode_item &&
982             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
983                 return 1;
984         return 0;
985 }
986
987 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
988                                  struct inode_record *rec)
989 {
990         struct cache_extent *cache;
991         struct inode_backref *tmp, *backref;
992         struct ptr_node *node;
993         unsigned char filetype;
994
995         if (!rec->found_inode_item)
996                 return;
997
998         filetype = imode_to_type(rec->imode);
999         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1000                 if (backref->found_dir_item && backref->found_dir_index) {
1001                         if (backref->filetype != filetype)
1002                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1003                         if (!backref->errors && backref->found_inode_ref &&
1004                             rec->nlink == rec->found_link) {
1005                                 list_del(&backref->list);
1006                                 free(backref);
1007                         }
1008                 }
1009         }
1010
1011         if (!rec->checked || rec->merging)
1012                 return;
1013
1014         if (S_ISDIR(rec->imode)) {
1015                 if (rec->found_size != rec->isize)
1016                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1017                 if (rec->found_file_extent)
1018                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1019         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1020                 if (rec->found_dir_item)
1021                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1022                 if (rec->found_size != rec->nbytes)
1023                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1024                 if (rec->nlink > 0 && !no_holes &&
1025                     (rec->extent_end < rec->isize ||
1026                      first_extent_gap(&rec->holes) < rec->isize))
1027                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1028         }
1029
1030         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1031                 if (rec->found_csum_item && rec->nodatasum)
1032                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1033                 if (rec->some_csum_missing && !rec->nodatasum)
1034                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1035         }
1036
1037         BUG_ON(rec->refs != 1);
1038         if (can_free_inode_rec(rec)) {
1039                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1040                 node = container_of(cache, struct ptr_node, cache);
1041                 BUG_ON(node->data != rec);
1042                 remove_cache_extent(inode_cache, &node->cache);
1043                 free(node);
1044                 free_inode_rec(rec);
1045         }
1046 }
1047
1048 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1049 {
1050         struct btrfs_path path;
1051         struct btrfs_key key;
1052         int ret;
1053
1054         key.objectid = BTRFS_ORPHAN_OBJECTID;
1055         key.type = BTRFS_ORPHAN_ITEM_KEY;
1056         key.offset = ino;
1057
1058         btrfs_init_path(&path);
1059         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1060         btrfs_release_path(&path);
1061         if (ret > 0)
1062                 ret = -ENOENT;
1063         return ret;
1064 }
1065
1066 static int process_inode_item(struct extent_buffer *eb,
1067                               int slot, struct btrfs_key *key,
1068                               struct shared_node *active_node)
1069 {
1070         struct inode_record *rec;
1071         struct btrfs_inode_item *item;
1072
1073         rec = active_node->current;
1074         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1075         if (rec->found_inode_item) {
1076                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1077                 return 1;
1078         }
1079         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1080         rec->nlink = btrfs_inode_nlink(eb, item);
1081         rec->isize = btrfs_inode_size(eb, item);
1082         rec->nbytes = btrfs_inode_nbytes(eb, item);
1083         rec->imode = btrfs_inode_mode(eb, item);
1084         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1085                 rec->nodatasum = 1;
1086         rec->found_inode_item = 1;
1087         if (rec->nlink == 0)
1088                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1089         maybe_free_inode_rec(&active_node->inode_cache, rec);
1090         return 0;
1091 }
1092
1093 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1094                                                 const char *name,
1095                                                 int namelen, u64 dir)
1096 {
1097         struct inode_backref *backref;
1098
1099         list_for_each_entry(backref, &rec->backrefs, list) {
1100                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1101                         break;
1102                 if (backref->dir != dir || backref->namelen != namelen)
1103                         continue;
1104                 if (memcmp(name, backref->name, namelen))
1105                         continue;
1106                 return backref;
1107         }
1108
1109         backref = malloc(sizeof(*backref) + namelen + 1);
1110         if (!backref)
1111                 return NULL;
1112         memset(backref, 0, sizeof(*backref));
1113         backref->dir = dir;
1114         backref->namelen = namelen;
1115         memcpy(backref->name, name, namelen);
1116         backref->name[namelen] = '\0';
1117         list_add_tail(&backref->list, &rec->backrefs);
1118         return backref;
1119 }
1120
1121 static int add_inode_backref(struct cache_tree *inode_cache,
1122                              u64 ino, u64 dir, u64 index,
1123                              const char *name, int namelen,
1124                              int filetype, int itemtype, int errors)
1125 {
1126         struct inode_record *rec;
1127         struct inode_backref *backref;
1128
1129         rec = get_inode_rec(inode_cache, ino, 1);
1130         BUG_ON(IS_ERR(rec));
1131         backref = get_inode_backref(rec, name, namelen, dir);
1132         BUG_ON(!backref);
1133         if (errors)
1134                 backref->errors |= errors;
1135         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1136                 if (backref->found_dir_index)
1137                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1138                 if (backref->found_inode_ref && backref->index != index)
1139                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1140                 if (backref->found_dir_item && backref->filetype != filetype)
1141                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1142
1143                 backref->index = index;
1144                 backref->filetype = filetype;
1145                 backref->found_dir_index = 1;
1146         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1147                 rec->found_link++;
1148                 if (backref->found_dir_item)
1149                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1150                 if (backref->found_dir_index && backref->filetype != filetype)
1151                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1152
1153                 backref->filetype = filetype;
1154                 backref->found_dir_item = 1;
1155         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1156                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1157                 if (backref->found_inode_ref)
1158                         backref->errors |= REF_ERR_DUP_INODE_REF;
1159                 if (backref->found_dir_index && backref->index != index)
1160                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1161                 else
1162                         backref->index = index;
1163
1164                 backref->ref_type = itemtype;
1165                 backref->found_inode_ref = 1;
1166         } else {
1167                 BUG_ON(1);
1168         }
1169
1170         maybe_free_inode_rec(inode_cache, rec);
1171         return 0;
1172 }
1173
1174 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1175                             struct cache_tree *dst_cache)
1176 {
1177         struct inode_backref *backref;
1178         u32 dir_count = 0;
1179         int ret = 0;
1180
1181         dst->merging = 1;
1182         list_for_each_entry(backref, &src->backrefs, list) {
1183                 if (backref->found_dir_index) {
1184                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1185                                         backref->index, backref->name,
1186                                         backref->namelen, backref->filetype,
1187                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1188                 }
1189                 if (backref->found_dir_item) {
1190                         dir_count++;
1191                         add_inode_backref(dst_cache, dst->ino,
1192                                         backref->dir, 0, backref->name,
1193                                         backref->namelen, backref->filetype,
1194                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1195                 }
1196                 if (backref->found_inode_ref) {
1197                         add_inode_backref(dst_cache, dst->ino,
1198                                         backref->dir, backref->index,
1199                                         backref->name, backref->namelen, 0,
1200                                         backref->ref_type, backref->errors);
1201                 }
1202         }
1203
1204         if (src->found_dir_item)
1205                 dst->found_dir_item = 1;
1206         if (src->found_file_extent)
1207                 dst->found_file_extent = 1;
1208         if (src->found_csum_item)
1209                 dst->found_csum_item = 1;
1210         if (src->some_csum_missing)
1211                 dst->some_csum_missing = 1;
1212         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1213                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1214                 if (ret < 0)
1215                         return ret;
1216         }
1217
1218         BUG_ON(src->found_link < dir_count);
1219         dst->found_link += src->found_link - dir_count;
1220         dst->found_size += src->found_size;
1221         if (src->extent_start != (u64)-1) {
1222                 if (dst->extent_start == (u64)-1) {
1223                         dst->extent_start = src->extent_start;
1224                         dst->extent_end = src->extent_end;
1225                 } else {
1226                         if (dst->extent_end > src->extent_start)
1227                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1228                         else if (dst->extent_end < src->extent_start) {
1229                                 ret = add_file_extent_hole(&dst->holes,
1230                                         dst->extent_end,
1231                                         src->extent_start - dst->extent_end);
1232                         }
1233                         if (dst->extent_end < src->extent_end)
1234                                 dst->extent_end = src->extent_end;
1235                 }
1236         }
1237
1238         dst->errors |= src->errors;
1239         if (src->found_inode_item) {
1240                 if (!dst->found_inode_item) {
1241                         dst->nlink = src->nlink;
1242                         dst->isize = src->isize;
1243                         dst->nbytes = src->nbytes;
1244                         dst->imode = src->imode;
1245                         dst->nodatasum = src->nodatasum;
1246                         dst->found_inode_item = 1;
1247                 } else {
1248                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1249                 }
1250         }
1251         dst->merging = 0;
1252
1253         return 0;
1254 }
1255
1256 static int splice_shared_node(struct shared_node *src_node,
1257                               struct shared_node *dst_node)
1258 {
1259         struct cache_extent *cache;
1260         struct ptr_node *node, *ins;
1261         struct cache_tree *src, *dst;
1262         struct inode_record *rec, *conflict;
1263         u64 current_ino = 0;
1264         int splice = 0;
1265         int ret;
1266
1267         if (--src_node->refs == 0)
1268                 splice = 1;
1269         if (src_node->current)
1270                 current_ino = src_node->current->ino;
1271
1272         src = &src_node->root_cache;
1273         dst = &dst_node->root_cache;
1274 again:
1275         cache = search_cache_extent(src, 0);
1276         while (cache) {
1277                 node = container_of(cache, struct ptr_node, cache);
1278                 rec = node->data;
1279                 cache = next_cache_extent(cache);
1280
1281                 if (splice) {
1282                         remove_cache_extent(src, &node->cache);
1283                         ins = node;
1284                 } else {
1285                         ins = malloc(sizeof(*ins));
1286                         BUG_ON(!ins);
1287                         ins->cache.start = node->cache.start;
1288                         ins->cache.size = node->cache.size;
1289                         ins->data = rec;
1290                         rec->refs++;
1291                 }
1292                 ret = insert_cache_extent(dst, &ins->cache);
1293                 if (ret == -EEXIST) {
1294                         conflict = get_inode_rec(dst, rec->ino, 1);
1295                         BUG_ON(IS_ERR(conflict));
1296                         merge_inode_recs(rec, conflict, dst);
1297                         if (rec->checked) {
1298                                 conflict->checked = 1;
1299                                 if (dst_node->current == conflict)
1300                                         dst_node->current = NULL;
1301                         }
1302                         maybe_free_inode_rec(dst, conflict);
1303                         free_inode_rec(rec);
1304                         free(ins);
1305                 } else {
1306                         BUG_ON(ret);
1307                 }
1308         }
1309
1310         if (src == &src_node->root_cache) {
1311                 src = &src_node->inode_cache;
1312                 dst = &dst_node->inode_cache;
1313                 goto again;
1314         }
1315
1316         if (current_ino > 0 && (!dst_node->current ||
1317             current_ino > dst_node->current->ino)) {
1318                 if (dst_node->current) {
1319                         dst_node->current->checked = 1;
1320                         maybe_free_inode_rec(dst, dst_node->current);
1321                 }
1322                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1323                 BUG_ON(IS_ERR(dst_node->current));
1324         }
1325         return 0;
1326 }
1327
1328 static void free_inode_ptr(struct cache_extent *cache)
1329 {
1330         struct ptr_node *node;
1331         struct inode_record *rec;
1332
1333         node = container_of(cache, struct ptr_node, cache);
1334         rec = node->data;
1335         free_inode_rec(rec);
1336         free(node);
1337 }
1338
1339 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1340
1341 static struct shared_node *find_shared_node(struct cache_tree *shared,
1342                                             u64 bytenr)
1343 {
1344         struct cache_extent *cache;
1345         struct shared_node *node;
1346
1347         cache = lookup_cache_extent(shared, bytenr, 1);
1348         if (cache) {
1349                 node = container_of(cache, struct shared_node, cache);
1350                 return node;
1351         }
1352         return NULL;
1353 }
1354
1355 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1356 {
1357         int ret;
1358         struct shared_node *node;
1359
1360         node = calloc(1, sizeof(*node));
1361         if (!node)
1362                 return -ENOMEM;
1363         node->cache.start = bytenr;
1364         node->cache.size = 1;
1365         cache_tree_init(&node->root_cache);
1366         cache_tree_init(&node->inode_cache);
1367         node->refs = refs;
1368
1369         ret = insert_cache_extent(shared, &node->cache);
1370
1371         return ret;
1372 }
1373
1374 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1375                              struct walk_control *wc, int level)
1376 {
1377         struct shared_node *node;
1378         struct shared_node *dest;
1379         int ret;
1380
1381         if (level == wc->active_node)
1382                 return 0;
1383
1384         BUG_ON(wc->active_node <= level);
1385         node = find_shared_node(&wc->shared, bytenr);
1386         if (!node) {
1387                 ret = add_shared_node(&wc->shared, bytenr, refs);
1388                 BUG_ON(ret);
1389                 node = find_shared_node(&wc->shared, bytenr);
1390                 wc->nodes[level] = node;
1391                 wc->active_node = level;
1392                 return 0;
1393         }
1394
1395         if (wc->root_level == wc->active_node &&
1396             btrfs_root_refs(&root->root_item) == 0) {
1397                 if (--node->refs == 0) {
1398                         free_inode_recs_tree(&node->root_cache);
1399                         free_inode_recs_tree(&node->inode_cache);
1400                         remove_cache_extent(&wc->shared, &node->cache);
1401                         free(node);
1402                 }
1403                 return 1;
1404         }
1405
1406         dest = wc->nodes[wc->active_node];
1407         splice_shared_node(node, dest);
1408         if (node->refs == 0) {
1409                 remove_cache_extent(&wc->shared, &node->cache);
1410                 free(node);
1411         }
1412         return 1;
1413 }
1414
1415 static int leave_shared_node(struct btrfs_root *root,
1416                              struct walk_control *wc, int level)
1417 {
1418         struct shared_node *node;
1419         struct shared_node *dest;
1420         int i;
1421
1422         if (level == wc->root_level)
1423                 return 0;
1424
1425         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1426                 if (wc->nodes[i])
1427                         break;
1428         }
1429         BUG_ON(i >= BTRFS_MAX_LEVEL);
1430
1431         node = wc->nodes[wc->active_node];
1432         wc->nodes[wc->active_node] = NULL;
1433         wc->active_node = i;
1434
1435         dest = wc->nodes[wc->active_node];
1436         if (wc->active_node < wc->root_level ||
1437             btrfs_root_refs(&root->root_item) > 0) {
1438                 BUG_ON(node->refs <= 1);
1439                 splice_shared_node(node, dest);
1440         } else {
1441                 BUG_ON(node->refs < 2);
1442                 node->refs--;
1443         }
1444         return 0;
1445 }
1446
1447 /*
1448  * Returns:
1449  * < 0 - on error
1450  * 1   - if the root with id child_root_id is a child of root parent_root_id
1451  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1452  *       has other root(s) as parent(s)
1453  * 2   - if the root child_root_id doesn't have any parent roots
1454  */
1455 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1456                          u64 child_root_id)
1457 {
1458         struct btrfs_path path;
1459         struct btrfs_key key;
1460         struct extent_buffer *leaf;
1461         int has_parent = 0;
1462         int ret;
1463
1464         btrfs_init_path(&path);
1465
1466         key.objectid = parent_root_id;
1467         key.type = BTRFS_ROOT_REF_KEY;
1468         key.offset = child_root_id;
1469         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1470                                 0, 0);
1471         if (ret < 0)
1472                 return ret;
1473         btrfs_release_path(&path);
1474         if (!ret)
1475                 return 1;
1476
1477         key.objectid = child_root_id;
1478         key.type = BTRFS_ROOT_BACKREF_KEY;
1479         key.offset = 0;
1480         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1481                                 0, 0);
1482         if (ret < 0)
1483                 goto out;
1484
1485         while (1) {
1486                 leaf = path.nodes[0];
1487                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1488                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1489                         if (ret)
1490                                 break;
1491                         leaf = path.nodes[0];
1492                 }
1493
1494                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1495                 if (key.objectid != child_root_id ||
1496                     key.type != BTRFS_ROOT_BACKREF_KEY)
1497                         break;
1498
1499                 has_parent = 1;
1500
1501                 if (key.offset == parent_root_id) {
1502                         btrfs_release_path(&path);
1503                         return 1;
1504                 }
1505
1506                 path.slots[0]++;
1507         }
1508 out:
1509         btrfs_release_path(&path);
1510         if (ret < 0)
1511                 return ret;
1512         return has_parent ? 0 : 2;
1513 }
1514
1515 static int process_dir_item(struct btrfs_root *root,
1516                             struct extent_buffer *eb,
1517                             int slot, struct btrfs_key *key,
1518                             struct shared_node *active_node)
1519 {
1520         u32 total;
1521         u32 cur = 0;
1522         u32 len;
1523         u32 name_len;
1524         u32 data_len;
1525         int error;
1526         int nritems = 0;
1527         int filetype;
1528         struct btrfs_dir_item *di;
1529         struct inode_record *rec;
1530         struct cache_tree *root_cache;
1531         struct cache_tree *inode_cache;
1532         struct btrfs_key location;
1533         char namebuf[BTRFS_NAME_LEN];
1534
1535         root_cache = &active_node->root_cache;
1536         inode_cache = &active_node->inode_cache;
1537         rec = active_node->current;
1538         rec->found_dir_item = 1;
1539
1540         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1541         total = btrfs_item_size_nr(eb, slot);
1542         while (cur < total) {
1543                 nritems++;
1544                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1545                 name_len = btrfs_dir_name_len(eb, di);
1546                 data_len = btrfs_dir_data_len(eb, di);
1547                 filetype = btrfs_dir_type(eb, di);
1548
1549                 rec->found_size += name_len;
1550                 if (name_len <= BTRFS_NAME_LEN) {
1551                         len = name_len;
1552                         error = 0;
1553                 } else {
1554                         len = BTRFS_NAME_LEN;
1555                         error = REF_ERR_NAME_TOO_LONG;
1556                 }
1557                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1558
1559                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1560                         add_inode_backref(inode_cache, location.objectid,
1561                                           key->objectid, key->offset, namebuf,
1562                                           len, filetype, key->type, error);
1563                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1564                         add_inode_backref(root_cache, location.objectid,
1565                                           key->objectid, key->offset,
1566                                           namebuf, len, filetype,
1567                                           key->type, error);
1568                 } else {
1569                         fprintf(stderr, "invalid location in dir item %u\n",
1570                                 location.type);
1571                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1572                                           key->objectid, key->offset, namebuf,
1573                                           len, filetype, key->type, error);
1574                 }
1575
1576                 len = sizeof(*di) + name_len + data_len;
1577                 di = (struct btrfs_dir_item *)((char *)di + len);
1578                 cur += len;
1579         }
1580         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1581                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1582
1583         return 0;
1584 }
1585
1586 static int process_inode_ref(struct extent_buffer *eb,
1587                              int slot, struct btrfs_key *key,
1588                              struct shared_node *active_node)
1589 {
1590         u32 total;
1591         u32 cur = 0;
1592         u32 len;
1593         u32 name_len;
1594         u64 index;
1595         int error;
1596         struct cache_tree *inode_cache;
1597         struct btrfs_inode_ref *ref;
1598         char namebuf[BTRFS_NAME_LEN];
1599
1600         inode_cache = &active_node->inode_cache;
1601
1602         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1603         total = btrfs_item_size_nr(eb, slot);
1604         while (cur < total) {
1605                 name_len = btrfs_inode_ref_name_len(eb, ref);
1606                 index = btrfs_inode_ref_index(eb, ref);
1607                 if (name_len <= BTRFS_NAME_LEN) {
1608                         len = name_len;
1609                         error = 0;
1610                 } else {
1611                         len = BTRFS_NAME_LEN;
1612                         error = REF_ERR_NAME_TOO_LONG;
1613                 }
1614                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1615                 add_inode_backref(inode_cache, key->objectid, key->offset,
1616                                   index, namebuf, len, 0, key->type, error);
1617
1618                 len = sizeof(*ref) + name_len;
1619                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1620                 cur += len;
1621         }
1622         return 0;
1623 }
1624
1625 static int process_inode_extref(struct extent_buffer *eb,
1626                                 int slot, struct btrfs_key *key,
1627                                 struct shared_node *active_node)
1628 {
1629         u32 total;
1630         u32 cur = 0;
1631         u32 len;
1632         u32 name_len;
1633         u64 index;
1634         u64 parent;
1635         int error;
1636         struct cache_tree *inode_cache;
1637         struct btrfs_inode_extref *extref;
1638         char namebuf[BTRFS_NAME_LEN];
1639
1640         inode_cache = &active_node->inode_cache;
1641
1642         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1643         total = btrfs_item_size_nr(eb, slot);
1644         while (cur < total) {
1645                 name_len = btrfs_inode_extref_name_len(eb, extref);
1646                 index = btrfs_inode_extref_index(eb, extref);
1647                 parent = btrfs_inode_extref_parent(eb, extref);
1648                 if (name_len <= BTRFS_NAME_LEN) {
1649                         len = name_len;
1650                         error = 0;
1651                 } else {
1652                         len = BTRFS_NAME_LEN;
1653                         error = REF_ERR_NAME_TOO_LONG;
1654                 }
1655                 read_extent_buffer(eb, namebuf,
1656                                    (unsigned long)(extref + 1), len);
1657                 add_inode_backref(inode_cache, key->objectid, parent,
1658                                   index, namebuf, len, 0, key->type, error);
1659
1660                 len = sizeof(*extref) + name_len;
1661                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1662                 cur += len;
1663         }
1664         return 0;
1665
1666 }
1667
1668 static int count_csum_range(struct btrfs_root *root, u64 start,
1669                             u64 len, u64 *found)
1670 {
1671         struct btrfs_key key;
1672         struct btrfs_path path;
1673         struct extent_buffer *leaf;
1674         int ret;
1675         size_t size;
1676         *found = 0;
1677         u64 csum_end;
1678         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1679
1680         btrfs_init_path(&path);
1681
1682         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1683         key.offset = start;
1684         key.type = BTRFS_EXTENT_CSUM_KEY;
1685
1686         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1687                                 &key, &path, 0, 0);
1688         if (ret < 0)
1689                 goto out;
1690         if (ret > 0 && path.slots[0] > 0) {
1691                 leaf = path.nodes[0];
1692                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1693                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1694                     key.type == BTRFS_EXTENT_CSUM_KEY)
1695                         path.slots[0]--;
1696         }
1697
1698         while (len > 0) {
1699                 leaf = path.nodes[0];
1700                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1701                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1702                         if (ret > 0)
1703                                 break;
1704                         else if (ret < 0)
1705                                 goto out;
1706                         leaf = path.nodes[0];
1707                 }
1708
1709                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1710                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1711                     key.type != BTRFS_EXTENT_CSUM_KEY)
1712                         break;
1713
1714                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1715                 if (key.offset >= start + len)
1716                         break;
1717
1718                 if (key.offset > start)
1719                         start = key.offset;
1720
1721                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1722                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1723                 if (csum_end > start) {
1724                         size = min(csum_end - start, len);
1725                         len -= size;
1726                         start += size;
1727                         *found += size;
1728                 }
1729
1730                 path.slots[0]++;
1731         }
1732 out:
1733         btrfs_release_path(&path);
1734         if (ret < 0)
1735                 return ret;
1736         return 0;
1737 }
1738
1739 static int process_file_extent(struct btrfs_root *root,
1740                                 struct extent_buffer *eb,
1741                                 int slot, struct btrfs_key *key,
1742                                 struct shared_node *active_node)
1743 {
1744         struct inode_record *rec;
1745         struct btrfs_file_extent_item *fi;
1746         u64 num_bytes = 0;
1747         u64 disk_bytenr = 0;
1748         u64 extent_offset = 0;
1749         u64 mask = root->sectorsize - 1;
1750         int extent_type;
1751         int ret;
1752
1753         rec = active_node->current;
1754         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1755         rec->found_file_extent = 1;
1756
1757         if (rec->extent_start == (u64)-1) {
1758                 rec->extent_start = key->offset;
1759                 rec->extent_end = key->offset;
1760         }
1761
1762         if (rec->extent_end > key->offset)
1763                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1764         else if (rec->extent_end < key->offset) {
1765                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1766                                            key->offset - rec->extent_end);
1767                 if (ret < 0)
1768                         return ret;
1769         }
1770
1771         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1772         extent_type = btrfs_file_extent_type(eb, fi);
1773
1774         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1775                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1776                 if (num_bytes == 0)
1777                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778                 rec->found_size += num_bytes;
1779                 num_bytes = (num_bytes + mask) & ~mask;
1780         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1781                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1782                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1783                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1784                 extent_offset = btrfs_file_extent_offset(eb, fi);
1785                 if (num_bytes == 0 || (num_bytes & mask))
1786                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1787                 if (num_bytes + extent_offset >
1788                     btrfs_file_extent_ram_bytes(eb, fi))
1789                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1790                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1791                     (btrfs_file_extent_compression(eb, fi) ||
1792                      btrfs_file_extent_encryption(eb, fi) ||
1793                      btrfs_file_extent_other_encoding(eb, fi)))
1794                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1795                 if (disk_bytenr > 0)
1796                         rec->found_size += num_bytes;
1797         } else {
1798                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1799         }
1800         rec->extent_end = key->offset + num_bytes;
1801
1802         /*
1803          * The data reloc tree will copy full extents into its inode and then
1804          * copy the corresponding csums.  Because the extent it copied could be
1805          * a preallocated extent that hasn't been written to yet there may be no
1806          * csums to copy, ergo we won't have csums for our file extent.  This is
1807          * ok so just don't bother checking csums if the inode belongs to the
1808          * data reloc tree.
1809          */
1810         if (disk_bytenr > 0 &&
1811             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1812                 u64 found;
1813                 if (btrfs_file_extent_compression(eb, fi))
1814                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1815                 else
1816                         disk_bytenr += extent_offset;
1817
1818                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1819                 if (ret < 0)
1820                         return ret;
1821                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1822                         if (found > 0)
1823                                 rec->found_csum_item = 1;
1824                         if (found < num_bytes)
1825                                 rec->some_csum_missing = 1;
1826                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1827                         if (found > 0)
1828                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1829                 }
1830         }
1831         return 0;
1832 }
1833
1834 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1835                             struct walk_control *wc)
1836 {
1837         struct btrfs_key key;
1838         u32 nritems;
1839         int i;
1840         int ret = 0;
1841         struct cache_tree *inode_cache;
1842         struct shared_node *active_node;
1843
1844         if (wc->root_level == wc->active_node &&
1845             btrfs_root_refs(&root->root_item) == 0)
1846                 return 0;
1847
1848         active_node = wc->nodes[wc->active_node];
1849         inode_cache = &active_node->inode_cache;
1850         nritems = btrfs_header_nritems(eb);
1851         for (i = 0; i < nritems; i++) {
1852                 btrfs_item_key_to_cpu(eb, &key, i);
1853
1854                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1855                         continue;
1856                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1857                         continue;
1858
1859                 if (active_node->current == NULL ||
1860                     active_node->current->ino < key.objectid) {
1861                         if (active_node->current) {
1862                                 active_node->current->checked = 1;
1863                                 maybe_free_inode_rec(inode_cache,
1864                                                      active_node->current);
1865                         }
1866                         active_node->current = get_inode_rec(inode_cache,
1867                                                              key.objectid, 1);
1868                         BUG_ON(IS_ERR(active_node->current));
1869                 }
1870                 switch (key.type) {
1871                 case BTRFS_DIR_ITEM_KEY:
1872                 case BTRFS_DIR_INDEX_KEY:
1873                         ret = process_dir_item(root, eb, i, &key, active_node);
1874                         break;
1875                 case BTRFS_INODE_REF_KEY:
1876                         ret = process_inode_ref(eb, i, &key, active_node);
1877                         break;
1878                 case BTRFS_INODE_EXTREF_KEY:
1879                         ret = process_inode_extref(eb, i, &key, active_node);
1880                         break;
1881                 case BTRFS_INODE_ITEM_KEY:
1882                         ret = process_inode_item(eb, i, &key, active_node);
1883                         break;
1884                 case BTRFS_EXTENT_DATA_KEY:
1885                         ret = process_file_extent(root, eb, i, &key,
1886                                                   active_node);
1887                         break;
1888                 default:
1889                         break;
1890                 };
1891         }
1892         return ret;
1893 }
1894
1895 static void reada_walk_down(struct btrfs_root *root,
1896                             struct extent_buffer *node, int slot)
1897 {
1898         u64 bytenr;
1899         u64 ptr_gen;
1900         u32 nritems;
1901         u32 blocksize;
1902         int i;
1903         int level;
1904
1905         level = btrfs_header_level(node);
1906         if (level != 1)
1907                 return;
1908
1909         nritems = btrfs_header_nritems(node);
1910         blocksize = root->nodesize;
1911         for (i = slot; i < nritems; i++) {
1912                 bytenr = btrfs_node_blockptr(node, i);
1913                 ptr_gen = btrfs_node_ptr_generation(node, i);
1914                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1915         }
1916 }
1917
1918 /*
1919  * Check the child node/leaf by the following condition:
1920  * 1. the first item key of the node/leaf should be the same with the one
1921  *    in parent.
1922  * 2. block in parent node should match the child node/leaf.
1923  * 3. generation of parent node and child's header should be consistent.
1924  *
1925  * Or the child node/leaf pointed by the key in parent is not valid.
1926  *
1927  * We hope to check leaf owner too, but since subvol may share leaves,
1928  * which makes leaf owner check not so strong, key check should be
1929  * sufficient enough for that case.
1930  */
1931 static int check_child_node(struct btrfs_root *root,
1932                             struct extent_buffer *parent, int slot,
1933                             struct extent_buffer *child)
1934 {
1935         struct btrfs_key parent_key;
1936         struct btrfs_key child_key;
1937         int ret = 0;
1938
1939         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1940         if (btrfs_header_level(child) == 0)
1941                 btrfs_item_key_to_cpu(child, &child_key, 0);
1942         else
1943                 btrfs_node_key_to_cpu(child, &child_key, 0);
1944
1945         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1946                 ret = -EINVAL;
1947                 fprintf(stderr,
1948                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1949                         parent_key.objectid, parent_key.type, parent_key.offset,
1950                         child_key.objectid, child_key.type, child_key.offset);
1951         }
1952         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1953                 ret = -EINVAL;
1954                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1955                         btrfs_node_blockptr(parent, slot),
1956                         btrfs_header_bytenr(child));
1957         }
1958         if (btrfs_node_ptr_generation(parent, slot) !=
1959             btrfs_header_generation(child)) {
1960                 ret = -EINVAL;
1961                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1962                         btrfs_header_generation(child),
1963                         btrfs_node_ptr_generation(parent, slot));
1964         }
1965         return ret;
1966 }
1967
1968 struct node_refs {
1969         u64 bytenr[BTRFS_MAX_LEVEL];
1970         u64 refs[BTRFS_MAX_LEVEL];
1971 };
1972
1973 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1974                           struct walk_control *wc, int *level,
1975                           struct node_refs *nrefs)
1976 {
1977         enum btrfs_tree_block_status status;
1978         u64 bytenr;
1979         u64 ptr_gen;
1980         struct extent_buffer *next;
1981         struct extent_buffer *cur;
1982         u32 blocksize;
1983         int ret, err = 0;
1984         u64 refs;
1985
1986         WARN_ON(*level < 0);
1987         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1988
1989         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1990                 refs = nrefs->refs[*level];
1991                 ret = 0;
1992         } else {
1993                 ret = btrfs_lookup_extent_info(NULL, root,
1994                                        path->nodes[*level]->start,
1995                                        *level, 1, &refs, NULL);
1996                 if (ret < 0) {
1997                         err = ret;
1998                         goto out;
1999                 }
2000                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2001                 nrefs->refs[*level] = refs;
2002         }
2003
2004         if (refs > 1) {
2005                 ret = enter_shared_node(root, path->nodes[*level]->start,
2006                                         refs, wc, *level);
2007                 if (ret > 0) {
2008                         err = ret;
2009                         goto out;
2010                 }
2011         }
2012
2013         while (*level >= 0) {
2014                 WARN_ON(*level < 0);
2015                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2016                 cur = path->nodes[*level];
2017
2018                 if (btrfs_header_level(cur) != *level)
2019                         WARN_ON(1);
2020
2021                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2022                         break;
2023                 if (*level == 0) {
2024                         ret = process_one_leaf(root, cur, wc);
2025                         if (ret < 0)
2026                                 err = ret;
2027                         break;
2028                 }
2029                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2030                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2031                 blocksize = root->nodesize;
2032
2033                 if (bytenr == nrefs->bytenr[*level - 1]) {
2034                         refs = nrefs->refs[*level - 1];
2035                 } else {
2036                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2037                                         *level - 1, 1, &refs, NULL);
2038                         if (ret < 0) {
2039                                 refs = 0;
2040                         } else {
2041                                 nrefs->bytenr[*level - 1] = bytenr;
2042                                 nrefs->refs[*level - 1] = refs;
2043                         }
2044                 }
2045
2046                 if (refs > 1) {
2047                         ret = enter_shared_node(root, bytenr, refs,
2048                                                 wc, *level - 1);
2049                         if (ret > 0) {
2050                                 path->slots[*level]++;
2051                                 continue;
2052                         }
2053                 }
2054
2055                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2056                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2057                         free_extent_buffer(next);
2058                         reada_walk_down(root, cur, path->slots[*level]);
2059                         next = read_tree_block(root, bytenr, blocksize,
2060                                                ptr_gen);
2061                         if (!extent_buffer_uptodate(next)) {
2062                                 struct btrfs_key node_key;
2063
2064                                 btrfs_node_key_to_cpu(path->nodes[*level],
2065                                                       &node_key,
2066                                                       path->slots[*level]);
2067                                 btrfs_add_corrupt_extent_record(root->fs_info,
2068                                                 &node_key,
2069                                                 path->nodes[*level]->start,
2070                                                 root->nodesize, *level);
2071                                 err = -EIO;
2072                                 goto out;
2073                         }
2074                 }
2075
2076                 ret = check_child_node(root, cur, path->slots[*level], next);
2077                 if (ret) {
2078                         err = ret;
2079                         goto out;
2080                 }
2081
2082                 if (btrfs_is_leaf(next))
2083                         status = btrfs_check_leaf(root, NULL, next);
2084                 else
2085                         status = btrfs_check_node(root, NULL, next);
2086                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2087                         free_extent_buffer(next);
2088                         err = -EIO;
2089                         goto out;
2090                 }
2091
2092                 *level = *level - 1;
2093                 free_extent_buffer(path->nodes[*level]);
2094                 path->nodes[*level] = next;
2095                 path->slots[*level] = 0;
2096         }
2097 out:
2098         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2099         return err;
2100 }
2101
2102 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2103                         struct walk_control *wc, int *level)
2104 {
2105         int i;
2106         struct extent_buffer *leaf;
2107
2108         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2109                 leaf = path->nodes[i];
2110                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2111                         path->slots[i]++;
2112                         *level = i;
2113                         return 0;
2114                 } else {
2115                         free_extent_buffer(path->nodes[*level]);
2116                         path->nodes[*level] = NULL;
2117                         BUG_ON(*level > wc->active_node);
2118                         if (*level == wc->active_node)
2119                                 leave_shared_node(root, wc, *level);
2120                         *level = i + 1;
2121                 }
2122         }
2123         return 1;
2124 }
2125
2126 static int check_root_dir(struct inode_record *rec)
2127 {
2128         struct inode_backref *backref;
2129         int ret = -1;
2130
2131         if (!rec->found_inode_item || rec->errors)
2132                 goto out;
2133         if (rec->nlink != 1 || rec->found_link != 0)
2134                 goto out;
2135         if (list_empty(&rec->backrefs))
2136                 goto out;
2137         backref = to_inode_backref(rec->backrefs.next);
2138         if (!backref->found_inode_ref)
2139                 goto out;
2140         if (backref->index != 0 || backref->namelen != 2 ||
2141             memcmp(backref->name, "..", 2))
2142                 goto out;
2143         if (backref->found_dir_index || backref->found_dir_item)
2144                 goto out;
2145         ret = 0;
2146 out:
2147         return ret;
2148 }
2149
2150 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2151                               struct btrfs_root *root, struct btrfs_path *path,
2152                               struct inode_record *rec)
2153 {
2154         struct btrfs_inode_item *ei;
2155         struct btrfs_key key;
2156         int ret;
2157
2158         key.objectid = rec->ino;
2159         key.type = BTRFS_INODE_ITEM_KEY;
2160         key.offset = (u64)-1;
2161
2162         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2163         if (ret < 0)
2164                 goto out;
2165         if (ret) {
2166                 if (!path->slots[0]) {
2167                         ret = -ENOENT;
2168                         goto out;
2169                 }
2170                 path->slots[0]--;
2171                 ret = 0;
2172         }
2173         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2174         if (key.objectid != rec->ino) {
2175                 ret = -ENOENT;
2176                 goto out;
2177         }
2178
2179         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2180                             struct btrfs_inode_item);
2181         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2182         btrfs_mark_buffer_dirty(path->nodes[0]);
2183         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2184         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2185                root->root_key.objectid);
2186 out:
2187         btrfs_release_path(path);
2188         return ret;
2189 }
2190
2191 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2192                                     struct btrfs_root *root,
2193                                     struct btrfs_path *path,
2194                                     struct inode_record *rec)
2195 {
2196         int ret;
2197
2198         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2199         btrfs_release_path(path);
2200         if (!ret)
2201                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2202         return ret;
2203 }
2204
2205 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2206                                struct btrfs_root *root,
2207                                struct btrfs_path *path,
2208                                struct inode_record *rec)
2209 {
2210         struct btrfs_inode_item *ei;
2211         struct btrfs_key key;
2212         int ret = 0;
2213
2214         key.objectid = rec->ino;
2215         key.type = BTRFS_INODE_ITEM_KEY;
2216         key.offset = 0;
2217
2218         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2219         if (ret) {
2220                 if (ret > 0)
2221                         ret = -ENOENT;
2222                 goto out;
2223         }
2224
2225         /* Since ret == 0, no need to check anything */
2226         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2227                             struct btrfs_inode_item);
2228         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2229         btrfs_mark_buffer_dirty(path->nodes[0]);
2230         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2231         printf("reset nbytes for ino %llu root %llu\n",
2232                rec->ino, root->root_key.objectid);
2233 out:
2234         btrfs_release_path(path);
2235         return ret;
2236 }
2237
2238 static int add_missing_dir_index(struct btrfs_root *root,
2239                                  struct cache_tree *inode_cache,
2240                                  struct inode_record *rec,
2241                                  struct inode_backref *backref)
2242 {
2243         struct btrfs_path *path;
2244         struct btrfs_trans_handle *trans;
2245         struct btrfs_dir_item *dir_item;
2246         struct extent_buffer *leaf;
2247         struct btrfs_key key;
2248         struct btrfs_disk_key disk_key;
2249         struct inode_record *dir_rec;
2250         unsigned long name_ptr;
2251         u32 data_size = sizeof(*dir_item) + backref->namelen;
2252         int ret;
2253
2254         path = btrfs_alloc_path();
2255         if (!path)
2256                 return -ENOMEM;
2257
2258         trans = btrfs_start_transaction(root, 1);
2259         if (IS_ERR(trans)) {
2260                 btrfs_free_path(path);
2261                 return PTR_ERR(trans);
2262         }
2263
2264         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2265                 (unsigned long long)rec->ino);
2266         key.objectid = backref->dir;
2267         key.type = BTRFS_DIR_INDEX_KEY;
2268         key.offset = backref->index;
2269
2270         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2271         BUG_ON(ret);
2272
2273         leaf = path->nodes[0];
2274         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2275
2276         disk_key.objectid = cpu_to_le64(rec->ino);
2277         disk_key.type = BTRFS_INODE_ITEM_KEY;
2278         disk_key.offset = 0;
2279
2280         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2281         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2282         btrfs_set_dir_data_len(leaf, dir_item, 0);
2283         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2284         name_ptr = (unsigned long)(dir_item + 1);
2285         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2286         btrfs_mark_buffer_dirty(leaf);
2287         btrfs_free_path(path);
2288         btrfs_commit_transaction(trans, root);
2289
2290         backref->found_dir_index = 1;
2291         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2292         BUG_ON(IS_ERR(dir_rec));
2293         if (!dir_rec)
2294                 return 0;
2295         dir_rec->found_size += backref->namelen;
2296         if (dir_rec->found_size == dir_rec->isize &&
2297             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2298                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2299         if (dir_rec->found_size != dir_rec->isize)
2300                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2301
2302         return 0;
2303 }
2304
2305 static int delete_dir_index(struct btrfs_root *root,
2306                             struct cache_tree *inode_cache,
2307                             struct inode_record *rec,
2308                             struct inode_backref *backref)
2309 {
2310         struct btrfs_trans_handle *trans;
2311         struct btrfs_dir_item *di;
2312         struct btrfs_path *path;
2313         int ret = 0;
2314
2315         path = btrfs_alloc_path();
2316         if (!path)
2317                 return -ENOMEM;
2318
2319         trans = btrfs_start_transaction(root, 1);
2320         if (IS_ERR(trans)) {
2321                 btrfs_free_path(path);
2322                 return PTR_ERR(trans);
2323         }
2324
2325
2326         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2327                 (unsigned long long)backref->dir,
2328                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2329                 (unsigned long long)root->objectid);
2330
2331         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2332                                     backref->name, backref->namelen,
2333                                     backref->index, -1);
2334         if (IS_ERR(di)) {
2335                 ret = PTR_ERR(di);
2336                 btrfs_free_path(path);
2337                 btrfs_commit_transaction(trans, root);
2338                 if (ret == -ENOENT)
2339                         return 0;
2340                 return ret;
2341         }
2342
2343         if (!di)
2344                 ret = btrfs_del_item(trans, root, path);
2345         else
2346                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2347         BUG_ON(ret);
2348         btrfs_free_path(path);
2349         btrfs_commit_transaction(trans, root);
2350         return ret;
2351 }
2352
2353 static int create_inode_item(struct btrfs_root *root,
2354                              struct inode_record *rec,
2355                              struct inode_backref *backref, int root_dir)
2356 {
2357         struct btrfs_trans_handle *trans;
2358         struct btrfs_inode_item inode_item;
2359         time_t now = time(NULL);
2360         int ret;
2361
2362         trans = btrfs_start_transaction(root, 1);
2363         if (IS_ERR(trans)) {
2364                 ret = PTR_ERR(trans);
2365                 return ret;
2366         }
2367
2368         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2369                 "be incomplete, please check permissions and content after "
2370                 "the fsck completes.\n", (unsigned long long)root->objectid,
2371                 (unsigned long long)rec->ino);
2372
2373         memset(&inode_item, 0, sizeof(inode_item));
2374         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2375         if (root_dir)
2376                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2377         else
2378                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2379         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2380         if (rec->found_dir_item) {
2381                 if (rec->found_file_extent)
2382                         fprintf(stderr, "root %llu inode %llu has both a dir "
2383                                 "item and extents, unsure if it is a dir or a "
2384                                 "regular file so setting it as a directory\n",
2385                                 (unsigned long long)root->objectid,
2386                                 (unsigned long long)rec->ino);
2387                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2388                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2389         } else if (!rec->found_dir_item) {
2390                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2391                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2392         }
2393         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2394         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2395         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2396         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2397         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2398         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2399         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2400         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2401
2402         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2403         BUG_ON(ret);
2404         btrfs_commit_transaction(trans, root);
2405         return 0;
2406 }
2407
2408 static int repair_inode_backrefs(struct btrfs_root *root,
2409                                  struct inode_record *rec,
2410                                  struct cache_tree *inode_cache,
2411                                  int delete)
2412 {
2413         struct inode_backref *tmp, *backref;
2414         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2415         int ret = 0;
2416         int repaired = 0;
2417
2418         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2419                 if (!delete && rec->ino == root_dirid) {
2420                         if (!rec->found_inode_item) {
2421                                 ret = create_inode_item(root, rec, backref, 1);
2422                                 if (ret)
2423                                         break;
2424                                 repaired++;
2425                         }
2426                 }
2427
2428                 /* Index 0 for root dir's are special, don't mess with it */
2429                 if (rec->ino == root_dirid && backref->index == 0)
2430                         continue;
2431
2432                 if (delete &&
2433                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2434                      (backref->found_dir_index && backref->found_inode_ref &&
2435                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2436                         ret = delete_dir_index(root, inode_cache, rec, backref);
2437                         if (ret)
2438                                 break;
2439                         repaired++;
2440                         list_del(&backref->list);
2441                         free(backref);
2442                 }
2443
2444                 if (!delete && !backref->found_dir_index &&
2445                     backref->found_dir_item && backref->found_inode_ref) {
2446                         ret = add_missing_dir_index(root, inode_cache, rec,
2447                                                     backref);
2448                         if (ret)
2449                                 break;
2450                         repaired++;
2451                         if (backref->found_dir_item &&
2452                             backref->found_dir_index &&
2453                             backref->found_dir_index) {
2454                                 if (!backref->errors &&
2455                                     backref->found_inode_ref) {
2456                                         list_del(&backref->list);
2457                                         free(backref);
2458                                 }
2459                         }
2460                 }
2461
2462                 if (!delete && (!backref->found_dir_index &&
2463                                 !backref->found_dir_item &&
2464                                 backref->found_inode_ref)) {
2465                         struct btrfs_trans_handle *trans;
2466                         struct btrfs_key location;
2467
2468                         ret = check_dir_conflict(root, backref->name,
2469                                                  backref->namelen,
2470                                                  backref->dir,
2471                                                  backref->index);
2472                         if (ret) {
2473                                 /*
2474                                  * let nlink fixing routine to handle it,
2475                                  * which can do it better.
2476                                  */
2477                                 ret = 0;
2478                                 break;
2479                         }
2480                         location.objectid = rec->ino;
2481                         location.type = BTRFS_INODE_ITEM_KEY;
2482                         location.offset = 0;
2483
2484                         trans = btrfs_start_transaction(root, 1);
2485                         if (IS_ERR(trans)) {
2486                                 ret = PTR_ERR(trans);
2487                                 break;
2488                         }
2489                         fprintf(stderr, "adding missing dir index/item pair "
2490                                 "for inode %llu\n",
2491                                 (unsigned long long)rec->ino);
2492                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2493                                                     backref->namelen,
2494                                                     backref->dir, &location,
2495                                                     imode_to_type(rec->imode),
2496                                                     backref->index);
2497                         BUG_ON(ret);
2498                         btrfs_commit_transaction(trans, root);
2499                         repaired++;
2500                 }
2501
2502                 if (!delete && (backref->found_inode_ref &&
2503                                 backref->found_dir_index &&
2504                                 backref->found_dir_item &&
2505                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2506                                 !rec->found_inode_item)) {
2507                         ret = create_inode_item(root, rec, backref, 0);
2508                         if (ret)
2509                                 break;
2510                         repaired++;
2511                 }
2512
2513         }
2514         return ret ? ret : repaired;
2515 }
2516
2517 /*
2518  * To determine the file type for nlink/inode_item repair
2519  *
2520  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2521  * Return -ENOENT if file type is not found.
2522  */
2523 static int find_file_type(struct inode_record *rec, u8 *type)
2524 {
2525         struct inode_backref *backref;
2526
2527         /* For inode item recovered case */
2528         if (rec->found_inode_item) {
2529                 *type = imode_to_type(rec->imode);
2530                 return 0;
2531         }
2532
2533         list_for_each_entry(backref, &rec->backrefs, list) {
2534                 if (backref->found_dir_index || backref->found_dir_item) {
2535                         *type = backref->filetype;
2536                         return 0;
2537                 }
2538         }
2539         return -ENOENT;
2540 }
2541
2542 /*
2543  * To determine the file name for nlink repair
2544  *
2545  * Return 0 if file name is found, set name and namelen.
2546  * Return -ENOENT if file name is not found.
2547  */
2548 static int find_file_name(struct inode_record *rec,
2549                           char *name, int *namelen)
2550 {
2551         struct inode_backref *backref;
2552
2553         list_for_each_entry(backref, &rec->backrefs, list) {
2554                 if (backref->found_dir_index || backref->found_dir_item ||
2555                     backref->found_inode_ref) {
2556                         memcpy(name, backref->name, backref->namelen);
2557                         *namelen = backref->namelen;
2558                         return 0;
2559                 }
2560         }
2561         return -ENOENT;
2562 }
2563
2564 /* Reset the nlink of the inode to the correct one */
2565 static int reset_nlink(struct btrfs_trans_handle *trans,
2566                        struct btrfs_root *root,
2567                        struct btrfs_path *path,
2568                        struct inode_record *rec)
2569 {
2570         struct inode_backref *backref;
2571         struct inode_backref *tmp;
2572         struct btrfs_key key;
2573         struct btrfs_inode_item *inode_item;
2574         int ret = 0;
2575
2576         /* We don't believe this either, reset it and iterate backref */
2577         rec->found_link = 0;
2578
2579         /* Remove all backref including the valid ones */
2580         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2581                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2582                                    backref->index, backref->name,
2583                                    backref->namelen, 0);
2584                 if (ret < 0)
2585                         goto out;
2586
2587                 /* remove invalid backref, so it won't be added back */
2588                 if (!(backref->found_dir_index &&
2589                       backref->found_dir_item &&
2590                       backref->found_inode_ref)) {
2591                         list_del(&backref->list);
2592                         free(backref);
2593                 } else {
2594                         rec->found_link++;
2595                 }
2596         }
2597
2598         /* Set nlink to 0 */
2599         key.objectid = rec->ino;
2600         key.type = BTRFS_INODE_ITEM_KEY;
2601         key.offset = 0;
2602         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2603         if (ret < 0)
2604                 goto out;
2605         if (ret > 0) {
2606                 ret = -ENOENT;
2607                 goto out;
2608         }
2609         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2610                                     struct btrfs_inode_item);
2611         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2612         btrfs_mark_buffer_dirty(path->nodes[0]);
2613         btrfs_release_path(path);
2614
2615         /*
2616          * Add back valid inode_ref/dir_item/dir_index,
2617          * add_link() will handle the nlink inc, so new nlink must be correct
2618          */
2619         list_for_each_entry(backref, &rec->backrefs, list) {
2620                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2621                                      backref->name, backref->namelen,
2622                                      backref->filetype, &backref->index, 1);
2623                 if (ret < 0)
2624                         goto out;
2625         }
2626 out:
2627         btrfs_release_path(path);
2628         return ret;
2629 }
2630
2631 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2632                                struct btrfs_root *root,
2633                                struct btrfs_path *path,
2634                                struct inode_record *rec)
2635 {
2636         char *dir_name = "lost+found";
2637         char namebuf[BTRFS_NAME_LEN] = {0};
2638         u64 lost_found_ino;
2639         u32 mode = 0700;
2640         u8 type = 0;
2641         int namelen = 0;
2642         int name_recovered = 0;
2643         int type_recovered = 0;
2644         int ret = 0;
2645
2646         /*
2647          * Get file name and type first before these invalid inode ref
2648          * are deleted by remove_all_invalid_backref()
2649          */
2650         name_recovered = !find_file_name(rec, namebuf, &namelen);
2651         type_recovered = !find_file_type(rec, &type);
2652
2653         if (!name_recovered) {
2654                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2655                        rec->ino, rec->ino);
2656                 namelen = count_digits(rec->ino);
2657                 sprintf(namebuf, "%llu", rec->ino);
2658                 name_recovered = 1;
2659         }
2660         if (!type_recovered) {
2661                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2662                        rec->ino);
2663                 type = BTRFS_FT_REG_FILE;
2664                 type_recovered = 1;
2665         }
2666
2667         ret = reset_nlink(trans, root, path, rec);
2668         if (ret < 0) {
2669                 fprintf(stderr,
2670                         "Failed to reset nlink for inode %llu: %s\n",
2671                         rec->ino, strerror(-ret));
2672                 goto out;
2673         }
2674
2675         if (rec->found_link == 0) {
2676                 lost_found_ino = root->highest_inode;
2677                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2678                         ret = -EOVERFLOW;
2679                         goto out;
2680                 }
2681                 lost_found_ino++;
2682                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2683                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2684                                   mode);
2685                 if (ret < 0) {
2686                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2687                                 dir_name, strerror(-ret));
2688                         goto out;
2689                 }
2690                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2691                                      namebuf, namelen, type, NULL, 1);
2692                 /*
2693                  * Add ".INO" suffix several times to handle case where
2694                  * "FILENAME.INO" is already taken by another file.
2695                  */
2696                 while (ret == -EEXIST) {
2697                         /*
2698                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2699                          */
2700                         if (namelen + count_digits(rec->ino) + 1 >
2701                             BTRFS_NAME_LEN) {
2702                                 ret = -EFBIG;
2703                                 goto out;
2704                         }
2705                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2706                                  ".%llu", rec->ino);
2707                         namelen += count_digits(rec->ino) + 1;
2708                         ret = btrfs_add_link(trans, root, rec->ino,
2709                                              lost_found_ino, namebuf,
2710                                              namelen, type, NULL, 1);
2711                 }
2712                 if (ret < 0) {
2713                         fprintf(stderr,
2714                                 "Failed to link the inode %llu to %s dir: %s\n",
2715                                 rec->ino, dir_name, strerror(-ret));
2716                         goto out;
2717                 }
2718                 /*
2719                  * Just increase the found_link, don't actually add the
2720                  * backref. This will make things easier and this inode
2721                  * record will be freed after the repair is done.
2722                  * So fsck will not report problem about this inode.
2723                  */
2724                 rec->found_link++;
2725                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2726                        namelen, namebuf, dir_name);
2727         }
2728         printf("Fixed the nlink of inode %llu\n", rec->ino);
2729 out:
2730         /*
2731          * Clear the flag anyway, or we will loop forever for the same inode
2732          * as it will not be removed from the bad inode list and the dead loop
2733          * happens.
2734          */
2735         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2736         btrfs_release_path(path);
2737         return ret;
2738 }
2739
2740 /*
2741  * Check if there is any normal(reg or prealloc) file extent for given
2742  * ino.
2743  * This is used to determine the file type when neither its dir_index/item or
2744  * inode_item exists.
2745  *
2746  * This will *NOT* report error, if any error happens, just consider it does
2747  * not have any normal file extent.
2748  */
2749 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2750 {
2751         struct btrfs_path *path;
2752         struct btrfs_key key;
2753         struct btrfs_key found_key;
2754         struct btrfs_file_extent_item *fi;
2755         u8 type;
2756         int ret = 0;
2757
2758         path = btrfs_alloc_path();
2759         if (!path)
2760                 goto out;
2761         key.objectid = ino;
2762         key.type = BTRFS_EXTENT_DATA_KEY;
2763         key.offset = 0;
2764
2765         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2766         if (ret < 0) {
2767                 ret = 0;
2768                 goto out;
2769         }
2770         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2771                 ret = btrfs_next_leaf(root, path);
2772                 if (ret) {
2773                         ret = 0;
2774                         goto out;
2775                 }
2776         }
2777         while (1) {
2778                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2779                                       path->slots[0]);
2780                 if (found_key.objectid != ino ||
2781                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2782                         break;
2783                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2784                                     struct btrfs_file_extent_item);
2785                 type = btrfs_file_extent_type(path->nodes[0], fi);
2786                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2787                         ret = 1;
2788                         goto out;
2789                 }
2790         }
2791 out:
2792         btrfs_free_path(path);
2793         return ret;
2794 }
2795
2796 static u32 btrfs_type_to_imode(u8 type)
2797 {
2798         static u32 imode_by_btrfs_type[] = {
2799                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2800                 [BTRFS_FT_DIR]          = S_IFDIR,
2801                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2802                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2803                 [BTRFS_FT_FIFO]         = S_IFIFO,
2804                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2805                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2806         };
2807
2808         return imode_by_btrfs_type[(type)];
2809 }
2810
2811 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2812                                 struct btrfs_root *root,
2813                                 struct btrfs_path *path,
2814                                 struct inode_record *rec)
2815 {
2816         u8 filetype;
2817         u32 mode = 0700;
2818         int type_recovered = 0;
2819         int ret = 0;
2820
2821         printf("Trying to rebuild inode:%llu\n", rec->ino);
2822
2823         type_recovered = !find_file_type(rec, &filetype);
2824
2825         /*
2826          * Try to determine inode type if type not found.
2827          *
2828          * For found regular file extent, it must be FILE.
2829          * For found dir_item/index, it must be DIR.
2830          *
2831          * For undetermined one, use FILE as fallback.
2832          *
2833          * TODO:
2834          * 1. If found backref(inode_index/item is already handled) to it,
2835          *    it must be DIR.
2836          *    Need new inode-inode ref structure to allow search for that.
2837          */
2838         if (!type_recovered) {
2839                 if (rec->found_file_extent &&
2840                     find_normal_file_extent(root, rec->ino)) {
2841                         type_recovered = 1;
2842                         filetype = BTRFS_FT_REG_FILE;
2843                 } else if (rec->found_dir_item) {
2844                         type_recovered = 1;
2845                         filetype = BTRFS_FT_DIR;
2846                 } else if (!list_empty(&rec->orphan_extents)) {
2847                         type_recovered = 1;
2848                         filetype = BTRFS_FT_REG_FILE;
2849                 } else{
2850                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2851                                rec->ino);
2852                         type_recovered = 1;
2853                         filetype = BTRFS_FT_REG_FILE;
2854                 }
2855         }
2856
2857         ret = btrfs_new_inode(trans, root, rec->ino,
2858                               mode | btrfs_type_to_imode(filetype));
2859         if (ret < 0)
2860                 goto out;
2861
2862         /*
2863          * Here inode rebuild is done, we only rebuild the inode item,
2864          * don't repair the nlink(like move to lost+found).
2865          * That is the job of nlink repair.
2866          *
2867          * We just fill the record and return
2868          */
2869         rec->found_dir_item = 1;
2870         rec->imode = mode | btrfs_type_to_imode(filetype);
2871         rec->nlink = 0;
2872         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2873         /* Ensure the inode_nlinks repair function will be called */
2874         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2875 out:
2876         return ret;
2877 }
2878
2879 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2880                                       struct btrfs_root *root,
2881                                       struct btrfs_path *path,
2882                                       struct inode_record *rec)
2883 {
2884         struct orphan_data_extent *orphan;
2885         struct orphan_data_extent *tmp;
2886         int ret = 0;
2887
2888         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2889                 /*
2890                  * Check for conflicting file extents
2891                  *
2892                  * Here we don't know whether the extents is compressed or not,
2893                  * so we can only assume it not compressed nor data offset,
2894                  * and use its disk_len as extent length.
2895                  */
2896                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2897                                        orphan->offset, orphan->disk_len, 0);
2898                 btrfs_release_path(path);
2899                 if (ret < 0)
2900                         goto out;
2901                 if (!ret) {
2902                         fprintf(stderr,
2903                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2904                                 orphan->disk_bytenr, orphan->disk_len);
2905                         ret = btrfs_free_extent(trans,
2906                                         root->fs_info->extent_root,
2907                                         orphan->disk_bytenr, orphan->disk_len,
2908                                         0, root->objectid, orphan->objectid,
2909                                         orphan->offset);
2910                         if (ret < 0)
2911                                 goto out;
2912                 }
2913                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2914                                 orphan->offset, orphan->disk_bytenr,
2915                                 orphan->disk_len, orphan->disk_len);
2916                 if (ret < 0)
2917                         goto out;
2918
2919                 /* Update file size info */
2920                 rec->found_size += orphan->disk_len;
2921                 if (rec->found_size == rec->nbytes)
2922                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2923
2924                 /* Update the file extent hole info too */
2925                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2926                                            orphan->disk_len);
2927                 if (ret < 0)
2928                         goto out;
2929                 if (RB_EMPTY_ROOT(&rec->holes))
2930                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2931
2932                 list_del(&orphan->list);
2933                 free(orphan);
2934         }
2935         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2936 out:
2937         return ret;
2938 }
2939
2940 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2941                                         struct btrfs_root *root,
2942                                         struct btrfs_path *path,
2943                                         struct inode_record *rec)
2944 {
2945         struct rb_node *node;
2946         struct file_extent_hole *hole;
2947         int found = 0;
2948         int ret = 0;
2949
2950         node = rb_first(&rec->holes);
2951
2952         while (node) {
2953                 found = 1;
2954                 hole = rb_entry(node, struct file_extent_hole, node);
2955                 ret = btrfs_punch_hole(trans, root, rec->ino,
2956                                        hole->start, hole->len);
2957                 if (ret < 0)
2958                         goto out;
2959                 ret = del_file_extent_hole(&rec->holes, hole->start,
2960                                            hole->len);
2961                 if (ret < 0)
2962                         goto out;
2963                 if (RB_EMPTY_ROOT(&rec->holes))
2964                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2965                 node = rb_first(&rec->holes);
2966         }
2967         /* special case for a file losing all its file extent */
2968         if (!found) {
2969                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2970                                        round_up(rec->isize, root->sectorsize));
2971                 if (ret < 0)
2972                         goto out;
2973         }
2974         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2975                rec->ino, root->objectid);
2976 out:
2977         return ret;
2978 }
2979
2980 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2981 {
2982         struct btrfs_trans_handle *trans;
2983         struct btrfs_path *path;
2984         int ret = 0;
2985
2986         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2987                              I_ERR_NO_ORPHAN_ITEM |
2988                              I_ERR_LINK_COUNT_WRONG |
2989                              I_ERR_NO_INODE_ITEM |
2990                              I_ERR_FILE_EXTENT_ORPHAN |
2991                              I_ERR_FILE_EXTENT_DISCOUNT|
2992                              I_ERR_FILE_NBYTES_WRONG)))
2993                 return rec->errors;
2994
2995         path = btrfs_alloc_path();
2996         if (!path)
2997                 return -ENOMEM;
2998
2999         /*
3000          * For nlink repair, it may create a dir and add link, so
3001          * 2 for parent(256)'s dir_index and dir_item
3002          * 2 for lost+found dir's inode_item and inode_ref
3003          * 1 for the new inode_ref of the file
3004          * 2 for lost+found dir's dir_index and dir_item for the file
3005          */
3006         trans = btrfs_start_transaction(root, 7);
3007         if (IS_ERR(trans)) {
3008                 btrfs_free_path(path);
3009                 return PTR_ERR(trans);
3010         }
3011
3012         if (rec->errors & I_ERR_NO_INODE_ITEM)
3013                 ret = repair_inode_no_item(trans, root, path, rec);
3014         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3015                 ret = repair_inode_orphan_extent(trans, root, path, rec);
3016         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3017                 ret = repair_inode_discount_extent(trans, root, path, rec);
3018         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3019                 ret = repair_inode_isize(trans, root, path, rec);
3020         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3021                 ret = repair_inode_orphan_item(trans, root, path, rec);
3022         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3023                 ret = repair_inode_nlinks(trans, root, path, rec);
3024         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3025                 ret = repair_inode_nbytes(trans, root, path, rec);
3026         btrfs_commit_transaction(trans, root);
3027         btrfs_free_path(path);
3028         return ret;
3029 }
3030
3031 static int check_inode_recs(struct btrfs_root *root,
3032                             struct cache_tree *inode_cache)
3033 {
3034         struct cache_extent *cache;
3035         struct ptr_node *node;
3036         struct inode_record *rec;
3037         struct inode_backref *backref;
3038         int stage = 0;
3039         int ret = 0;
3040         int err = 0;
3041         u64 error = 0;
3042         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3043
3044         if (btrfs_root_refs(&root->root_item) == 0) {
3045                 if (!cache_tree_empty(inode_cache))
3046                         fprintf(stderr, "warning line %d\n", __LINE__);
3047                 return 0;
3048         }
3049
3050         /*
3051          * We need to record the highest inode number for later 'lost+found'
3052          * dir creation.
3053          * We must select an ino not used/referred by any existing inode, or
3054          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3055          * this may cause 'lost+found' dir has wrong nlinks.
3056          */
3057         cache = last_cache_extent(inode_cache);
3058         if (cache) {
3059                 node = container_of(cache, struct ptr_node, cache);
3060                 rec = node->data;
3061                 if (rec->ino > root->highest_inode)
3062                         root->highest_inode = rec->ino;
3063         }
3064
3065         /*
3066          * We need to repair backrefs first because we could change some of the
3067          * errors in the inode recs.
3068          *
3069          * We also need to go through and delete invalid backrefs first and then
3070          * add the correct ones second.  We do this because we may get EEXIST
3071          * when adding back the correct index because we hadn't yet deleted the
3072          * invalid index.
3073          *
3074          * For example, if we were missing a dir index then the directories
3075          * isize would be wrong, so if we fixed the isize to what we thought it
3076          * would be and then fixed the backref we'd still have a invalid fs, so
3077          * we need to add back the dir index and then check to see if the isize
3078          * is still wrong.
3079          */
3080         while (stage < 3) {
3081                 stage++;
3082                 if (stage == 3 && !err)
3083                         break;
3084
3085                 cache = search_cache_extent(inode_cache, 0);
3086                 while (repair && cache) {
3087                         node = container_of(cache, struct ptr_node, cache);
3088                         rec = node->data;
3089                         cache = next_cache_extent(cache);
3090
3091                         /* Need to free everything up and rescan */
3092                         if (stage == 3) {
3093                                 remove_cache_extent(inode_cache, &node->cache);
3094                                 free(node);
3095                                 free_inode_rec(rec);
3096                                 continue;
3097                         }
3098
3099                         if (list_empty(&rec->backrefs))
3100                                 continue;
3101
3102                         ret = repair_inode_backrefs(root, rec, inode_cache,
3103                                                     stage == 1);
3104                         if (ret < 0) {
3105                                 err = ret;
3106                                 stage = 2;
3107                                 break;
3108                         } if (ret > 0) {
3109                                 err = -EAGAIN;
3110                         }
3111                 }
3112         }
3113         if (err)
3114                 return err;
3115
3116         rec = get_inode_rec(inode_cache, root_dirid, 0);
3117         BUG_ON(IS_ERR(rec));
3118         if (rec) {
3119                 ret = check_root_dir(rec);
3120                 if (ret) {
3121                         fprintf(stderr, "root %llu root dir %llu error\n",
3122                                 (unsigned long long)root->root_key.objectid,
3123                                 (unsigned long long)root_dirid);
3124                         print_inode_error(root, rec);
3125                         error++;
3126                 }
3127         } else {
3128                 if (repair) {
3129                         struct btrfs_trans_handle *trans;
3130
3131                         trans = btrfs_start_transaction(root, 1);
3132                         if (IS_ERR(trans)) {
3133                                 err = PTR_ERR(trans);
3134                                 return err;
3135                         }
3136
3137                         fprintf(stderr,
3138                                 "root %llu missing its root dir, recreating\n",
3139                                 (unsigned long long)root->objectid);
3140
3141                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3142                         BUG_ON(ret);
3143
3144                         btrfs_commit_transaction(trans, root);
3145                         return -EAGAIN;
3146                 }
3147
3148                 fprintf(stderr, "root %llu root dir %llu not found\n",
3149                         (unsigned long long)root->root_key.objectid,
3150                         (unsigned long long)root_dirid);
3151         }
3152
3153         while (1) {
3154                 cache = search_cache_extent(inode_cache, 0);
3155                 if (!cache)
3156                         break;
3157                 node = container_of(cache, struct ptr_node, cache);
3158                 rec = node->data;
3159                 remove_cache_extent(inode_cache, &node->cache);
3160                 free(node);
3161                 if (rec->ino == root_dirid ||
3162                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3163                         free_inode_rec(rec);
3164                         continue;
3165                 }
3166
3167                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3168                         ret = check_orphan_item(root, rec->ino);
3169                         if (ret == 0)
3170                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3171                         if (can_free_inode_rec(rec)) {
3172                                 free_inode_rec(rec);
3173                                 continue;
3174                         }
3175                 }
3176
3177                 if (!rec->found_inode_item)
3178                         rec->errors |= I_ERR_NO_INODE_ITEM;
3179                 if (rec->found_link != rec->nlink)
3180                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3181                 if (repair) {
3182                         ret = try_repair_inode(root, rec);
3183                         if (ret == 0 && can_free_inode_rec(rec)) {
3184                                 free_inode_rec(rec);
3185                                 continue;
3186                         }
3187                         ret = 0;
3188                 }
3189
3190                 if (!(repair && ret == 0))
3191                         error++;
3192                 print_inode_error(root, rec);
3193                 list_for_each_entry(backref, &rec->backrefs, list) {
3194                         if (!backref->found_dir_item)
3195                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3196                         if (!backref->found_dir_index)
3197                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3198                         if (!backref->found_inode_ref)
3199                                 backref->errors |= REF_ERR_NO_INODE_REF;
3200                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3201                                 " namelen %u name %s filetype %d errors %x",
3202                                 (unsigned long long)backref->dir,
3203                                 (unsigned long long)backref->index,
3204                                 backref->namelen, backref->name,
3205                                 backref->filetype, backref->errors);
3206                         print_ref_error(backref->errors);
3207                 }
3208                 free_inode_rec(rec);
3209         }
3210         return (error > 0) ? -1 : 0;
3211 }
3212
3213 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3214                                         u64 objectid)
3215 {
3216         struct cache_extent *cache;
3217         struct root_record *rec = NULL;
3218         int ret;
3219
3220         cache = lookup_cache_extent(root_cache, objectid, 1);
3221         if (cache) {
3222                 rec = container_of(cache, struct root_record, cache);
3223         } else {
3224                 rec = calloc(1, sizeof(*rec));
3225                 if (!rec)
3226                         return ERR_PTR(-ENOMEM);
3227                 rec->objectid = objectid;
3228                 INIT_LIST_HEAD(&rec->backrefs);
3229                 rec->cache.start = objectid;
3230                 rec->cache.size = 1;
3231
3232                 ret = insert_cache_extent(root_cache, &rec->cache);
3233                 if (ret)
3234                         return ERR_PTR(-EEXIST);
3235         }
3236         return rec;
3237 }
3238
3239 static struct root_backref *get_root_backref(struct root_record *rec,
3240                                              u64 ref_root, u64 dir, u64 index,
3241                                              const char *name, int namelen)
3242 {
3243         struct root_backref *backref;
3244
3245         list_for_each_entry(backref, &rec->backrefs, list) {
3246                 if (backref->ref_root != ref_root || backref->dir != dir ||
3247                     backref->namelen != namelen)
3248                         continue;
3249                 if (memcmp(name, backref->name, namelen))
3250                         continue;
3251                 return backref;
3252         }
3253
3254         backref = calloc(1, sizeof(*backref) + namelen + 1);
3255         if (!backref)
3256                 return NULL;
3257         backref->ref_root = ref_root;
3258         backref->dir = dir;
3259         backref->index = index;
3260         backref->namelen = namelen;
3261         memcpy(backref->name, name, namelen);
3262         backref->name[namelen] = '\0';
3263         list_add_tail(&backref->list, &rec->backrefs);
3264         return backref;
3265 }
3266
3267 static void free_root_record(struct cache_extent *cache)
3268 {
3269         struct root_record *rec;
3270         struct root_backref *backref;
3271
3272         rec = container_of(cache, struct root_record, cache);
3273         while (!list_empty(&rec->backrefs)) {
3274                 backref = to_root_backref(rec->backrefs.next);
3275                 list_del(&backref->list);
3276                 free(backref);
3277         }
3278
3279         kfree(rec);
3280 }
3281
3282 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3283
3284 static int add_root_backref(struct cache_tree *root_cache,
3285                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3286                             const char *name, int namelen,
3287                             int item_type, int errors)
3288 {
3289         struct root_record *rec;
3290         struct root_backref *backref;
3291
3292         rec = get_root_rec(root_cache, root_id);
3293         BUG_ON(IS_ERR(rec));
3294         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3295         BUG_ON(!backref);
3296
3297         backref->errors |= errors;
3298
3299         if (item_type != BTRFS_DIR_ITEM_KEY) {
3300                 if (backref->found_dir_index || backref->found_back_ref ||
3301                     backref->found_forward_ref) {
3302                         if (backref->index != index)
3303                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3304                 } else {
3305                         backref->index = index;
3306                 }
3307         }
3308
3309         if (item_type == BTRFS_DIR_ITEM_KEY) {
3310                 if (backref->found_forward_ref)
3311                         rec->found_ref++;
3312                 backref->found_dir_item = 1;
3313         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3314                 backref->found_dir_index = 1;
3315         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3316                 if (backref->found_forward_ref)
3317                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3318                 else if (backref->found_dir_item)
3319                         rec->found_ref++;
3320                 backref->found_forward_ref = 1;
3321         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3322                 if (backref->found_back_ref)
3323                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3324                 backref->found_back_ref = 1;
3325         } else {
3326                 BUG_ON(1);
3327         }
3328
3329         if (backref->found_forward_ref && backref->found_dir_item)
3330                 backref->reachable = 1;
3331         return 0;
3332 }
3333
3334 static int merge_root_recs(struct btrfs_root *root,
3335                            struct cache_tree *src_cache,
3336                            struct cache_tree *dst_cache)
3337 {
3338         struct cache_extent *cache;
3339         struct ptr_node *node;
3340         struct inode_record *rec;
3341         struct inode_backref *backref;
3342         int ret = 0;
3343
3344         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3345                 free_inode_recs_tree(src_cache);
3346                 return 0;
3347         }
3348
3349         while (1) {
3350                 cache = search_cache_extent(src_cache, 0);
3351                 if (!cache)
3352                         break;
3353                 node = container_of(cache, struct ptr_node, cache);
3354                 rec = node->data;
3355                 remove_cache_extent(src_cache, &node->cache);
3356                 free(node);
3357
3358                 ret = is_child_root(root, root->objectid, rec->ino);
3359                 if (ret < 0)
3360                         break;
3361                 else if (ret == 0)
3362                         goto skip;
3363
3364                 list_for_each_entry(backref, &rec->backrefs, list) {
3365                         BUG_ON(backref->found_inode_ref);
3366                         if (backref->found_dir_item)
3367                                 add_root_backref(dst_cache, rec->ino,
3368                                         root->root_key.objectid, backref->dir,
3369                                         backref->index, backref->name,
3370                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3371                                         backref->errors);
3372                         if (backref->found_dir_index)
3373                                 add_root_backref(dst_cache, rec->ino,
3374                                         root->root_key.objectid, backref->dir,
3375                                         backref->index, backref->name,
3376                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3377                                         backref->errors);
3378                 }
3379 skip:
3380                 free_inode_rec(rec);
3381         }
3382         if (ret < 0)
3383                 return ret;
3384         return 0;
3385 }
3386
3387 static int check_root_refs(struct btrfs_root *root,
3388                            struct cache_tree *root_cache)
3389 {
3390         struct root_record *rec;
3391         struct root_record *ref_root;
3392         struct root_backref *backref;
3393         struct cache_extent *cache;
3394         int loop = 1;
3395         int ret;
3396         int error;
3397         int errors = 0;
3398
3399         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3400         BUG_ON(IS_ERR(rec));
3401         rec->found_ref = 1;
3402
3403         /* fixme: this can not detect circular references */
3404         while (loop) {
3405                 loop = 0;
3406                 cache = search_cache_extent(root_cache, 0);
3407                 while (1) {
3408                         if (!cache)
3409                                 break;
3410                         rec = container_of(cache, struct root_record, cache);
3411                         cache = next_cache_extent(cache);
3412
3413                         if (rec->found_ref == 0)
3414                                 continue;
3415
3416                         list_for_each_entry(backref, &rec->backrefs, list) {
3417                                 if (!backref->reachable)
3418                                         continue;
3419
3420                                 ref_root = get_root_rec(root_cache,
3421                                                         backref->ref_root);
3422                                 BUG_ON(IS_ERR(ref_root));
3423                                 if (ref_root->found_ref > 0)
3424                                         continue;
3425
3426                                 backref->reachable = 0;
3427                                 rec->found_ref--;
3428                                 if (rec->found_ref == 0)
3429                                         loop = 1;
3430                         }
3431                 }
3432         }
3433
3434         cache = search_cache_extent(root_cache, 0);
3435         while (1) {
3436                 if (!cache)
3437                         break;
3438                 rec = container_of(cache, struct root_record, cache);
3439                 cache = next_cache_extent(cache);
3440
3441                 if (rec->found_ref == 0 &&
3442                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3443                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3444                         ret = check_orphan_item(root->fs_info->tree_root,
3445                                                 rec->objectid);
3446                         if (ret == 0)
3447                                 continue;
3448
3449                         /*
3450                          * If we don't have a root item then we likely just have
3451                          * a dir item in a snapshot for this root but no actual
3452                          * ref key or anything so it's meaningless.
3453                          */
3454                         if (!rec->found_root_item)
3455                                 continue;
3456                         errors++;
3457                         fprintf(stderr, "fs tree %llu not referenced\n",
3458                                 (unsigned long long)rec->objectid);
3459                 }
3460
3461                 error = 0;
3462                 if (rec->found_ref > 0 && !rec->found_root_item)
3463                         error = 1;
3464                 list_for_each_entry(backref, &rec->backrefs, list) {
3465                         if (!backref->found_dir_item)
3466                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3467                         if (!backref->found_dir_index)
3468                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3469                         if (!backref->found_back_ref)
3470                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3471                         if (!backref->found_forward_ref)
3472                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3473                         if (backref->reachable && backref->errors)
3474                                 error = 1;
3475                 }
3476                 if (!error)
3477                         continue;
3478
3479                 errors++;
3480                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3481                         (unsigned long long)rec->objectid, rec->found_ref,
3482                          rec->found_root_item ? "" : "not found");
3483
3484                 list_for_each_entry(backref, &rec->backrefs, list) {
3485                         if (!backref->reachable)
3486                                 continue;
3487                         if (!backref->errors && rec->found_root_item)
3488                                 continue;
3489                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3490                                 " index %llu namelen %u name %s errors %x\n",
3491                                 (unsigned long long)backref->ref_root,
3492                                 (unsigned long long)backref->dir,
3493                                 (unsigned long long)backref->index,
3494                                 backref->namelen, backref->name,
3495                                 backref->errors);
3496                         print_ref_error(backref->errors);
3497                 }
3498         }
3499         return errors > 0 ? 1 : 0;
3500 }
3501
3502 static int process_root_ref(struct extent_buffer *eb, int slot,
3503                             struct btrfs_key *key,
3504                             struct cache_tree *root_cache)
3505 {
3506         u64 dirid;
3507         u64 index;
3508         u32 len;
3509         u32 name_len;
3510         struct btrfs_root_ref *ref;
3511         char namebuf[BTRFS_NAME_LEN];
3512         int error;
3513
3514         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3515
3516         dirid = btrfs_root_ref_dirid(eb, ref);
3517         index = btrfs_root_ref_sequence(eb, ref);
3518         name_len = btrfs_root_ref_name_len(eb, ref);
3519
3520         if (name_len <= BTRFS_NAME_LEN) {
3521                 len = name_len;
3522                 error = 0;
3523         } else {
3524                 len = BTRFS_NAME_LEN;
3525                 error = REF_ERR_NAME_TOO_LONG;
3526         }
3527         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3528
3529         if (key->type == BTRFS_ROOT_REF_KEY) {
3530                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3531                                  index, namebuf, len, key->type, error);
3532         } else {
3533                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3534                                  index, namebuf, len, key->type, error);
3535         }
3536         return 0;
3537 }
3538
3539 static void free_corrupt_block(struct cache_extent *cache)
3540 {
3541         struct btrfs_corrupt_block *corrupt;
3542
3543         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3544         free(corrupt);
3545 }
3546
3547 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3548
3549 /*
3550  * Repair the btree of the given root.
3551  *
3552  * The fix is to remove the node key in corrupt_blocks cache_tree.
3553  * and rebalance the tree.
3554  * After the fix, the btree should be writeable.
3555  */
3556 static int repair_btree(struct btrfs_root *root,
3557                         struct cache_tree *corrupt_blocks)
3558 {
3559         struct btrfs_trans_handle *trans;
3560         struct btrfs_path *path;
3561         struct btrfs_corrupt_block *corrupt;
3562         struct cache_extent *cache;
3563         struct btrfs_key key;
3564         u64 offset;
3565         int level;
3566         int ret = 0;
3567
3568         if (cache_tree_empty(corrupt_blocks))
3569                 return 0;
3570
3571         path = btrfs_alloc_path();
3572         if (!path)
3573                 return -ENOMEM;
3574
3575         trans = btrfs_start_transaction(root, 1);
3576         if (IS_ERR(trans)) {
3577                 ret = PTR_ERR(trans);
3578                 fprintf(stderr, "Error starting transaction: %s\n",
3579                         strerror(-ret));
3580                 goto out_free_path;
3581         }
3582         cache = first_cache_extent(corrupt_blocks);
3583         while (cache) {
3584                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3585                                        cache);
3586                 level = corrupt->level;
3587                 path->lowest_level = level;
3588                 key.objectid = corrupt->key.objectid;
3589                 key.type = corrupt->key.type;
3590                 key.offset = corrupt->key.offset;
3591
3592                 /*
3593                  * Here we don't want to do any tree balance, since it may
3594                  * cause a balance with corrupted brother leaf/node,
3595                  * so ins_len set to 0 here.
3596                  * Balance will be done after all corrupt node/leaf is deleted.
3597                  */
3598                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3599                 if (ret < 0)
3600                         goto out;
3601                 offset = btrfs_node_blockptr(path->nodes[level],
3602                                              path->slots[level]);
3603
3604                 /* Remove the ptr */
3605                 ret = btrfs_del_ptr(trans, root, path, level,
3606                                     path->slots[level]);
3607                 if (ret < 0)
3608                         goto out;
3609                 /*
3610                  * Remove the corresponding extent
3611                  * return value is not concerned.
3612                  */
3613                 btrfs_release_path(path);
3614                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3615                                         0, root->root_key.objectid,
3616                                         level - 1, 0);
3617                 cache = next_cache_extent(cache);
3618         }
3619
3620         /* Balance the btree using btrfs_search_slot() */
3621         cache = first_cache_extent(corrupt_blocks);
3622         while (cache) {
3623                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3624                                        cache);
3625                 memcpy(&key, &corrupt->key, sizeof(key));
3626                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3627                 if (ret < 0)
3628                         goto out;
3629                 /* return will always >0 since it won't find the item */
3630                 ret = 0;
3631                 btrfs_release_path(path);
3632                 cache = next_cache_extent(cache);
3633         }
3634 out:
3635         btrfs_commit_transaction(trans, root);
3636 out_free_path:
3637         btrfs_free_path(path);
3638         return ret;
3639 }
3640
3641 static int check_fs_root(struct btrfs_root *root,
3642                          struct cache_tree *root_cache,
3643                          struct walk_control *wc)
3644 {
3645         int ret = 0;
3646         int err = 0;
3647         int wret;
3648         int level;
3649         struct btrfs_path path;
3650         struct shared_node root_node;
3651         struct root_record *rec;
3652         struct btrfs_root_item *root_item = &root->root_item;
3653         struct cache_tree corrupt_blocks;
3654         struct orphan_data_extent *orphan;
3655         struct orphan_data_extent *tmp;
3656         enum btrfs_tree_block_status status;
3657         struct node_refs nrefs;
3658
3659         /*
3660          * Reuse the corrupt_block cache tree to record corrupted tree block
3661          *
3662          * Unlike the usage in extent tree check, here we do it in a per
3663          * fs/subvol tree base.
3664          */
3665         cache_tree_init(&corrupt_blocks);
3666         root->fs_info->corrupt_blocks = &corrupt_blocks;
3667
3668         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3669                 rec = get_root_rec(root_cache, root->root_key.objectid);
3670                 BUG_ON(IS_ERR(rec));
3671                 if (btrfs_root_refs(root_item) > 0)
3672                         rec->found_root_item = 1;
3673         }
3674
3675         btrfs_init_path(&path);
3676         memset(&root_node, 0, sizeof(root_node));
3677         cache_tree_init(&root_node.root_cache);
3678         cache_tree_init(&root_node.inode_cache);
3679         memset(&nrefs, 0, sizeof(nrefs));
3680
3681         /* Move the orphan extent record to corresponding inode_record */
3682         list_for_each_entry_safe(orphan, tmp,
3683                                  &root->orphan_data_extents, list) {
3684                 struct inode_record *inode;
3685
3686                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3687                                       1);
3688                 BUG_ON(IS_ERR(inode));
3689                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3690                 list_move(&orphan->list, &inode->orphan_extents);
3691         }
3692
3693         level = btrfs_header_level(root->node);
3694         memset(wc->nodes, 0, sizeof(wc->nodes));
3695         wc->nodes[level] = &root_node;
3696         wc->active_node = level;
3697         wc->root_level = level;
3698
3699         /* We may not have checked the root block, lets do that now */
3700         if (btrfs_is_leaf(root->node))
3701                 status = btrfs_check_leaf(root, NULL, root->node);
3702         else
3703                 status = btrfs_check_node(root, NULL, root->node);
3704         if (status != BTRFS_TREE_BLOCK_CLEAN)
3705                 return -EIO;
3706
3707         if (btrfs_root_refs(root_item) > 0 ||
3708             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3709                 path.nodes[level] = root->node;
3710                 extent_buffer_get(root->node);
3711                 path.slots[level] = 0;
3712         } else {
3713                 struct btrfs_key key;
3714                 struct btrfs_disk_key found_key;
3715
3716                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3717                 level = root_item->drop_level;
3718                 path.lowest_level = level;
3719                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3720                 if (wret < 0)
3721                         goto skip_walking;
3722                 btrfs_node_key(path.nodes[level], &found_key,
3723                                 path.slots[level]);
3724                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3725                                         sizeof(found_key)));
3726         }
3727
3728         while (1) {
3729                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3730                 if (wret < 0)
3731                         ret = wret;
3732                 if (wret != 0)
3733                         break;
3734
3735                 wret = walk_up_tree(root, &path, wc, &level);
3736                 if (wret < 0)
3737                         ret = wret;
3738                 if (wret != 0)
3739                         break;
3740         }
3741 skip_walking:
3742         btrfs_release_path(&path);
3743
3744         if (!cache_tree_empty(&corrupt_blocks)) {
3745                 struct cache_extent *cache;
3746                 struct btrfs_corrupt_block *corrupt;
3747
3748                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3749                        root->root_key.objectid);
3750                 cache = first_cache_extent(&corrupt_blocks);
3751                 while (cache) {
3752                         corrupt = container_of(cache,
3753                                                struct btrfs_corrupt_block,
3754                                                cache);
3755                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3756                                cache->start, corrupt->level,
3757                                corrupt->key.objectid, corrupt->key.type,
3758                                corrupt->key.offset);
3759                         cache = next_cache_extent(cache);
3760                 }
3761                 if (repair) {
3762                         printf("Try to repair the btree for root %llu\n",
3763                                root->root_key.objectid);
3764                         ret = repair_btree(root, &corrupt_blocks);
3765                         if (ret < 0)
3766                                 fprintf(stderr, "Failed to repair btree: %s\n",
3767                                         strerror(-ret));
3768                         if (!ret)
3769                                 printf("Btree for root %llu is fixed\n",
3770                                        root->root_key.objectid);
3771                 }
3772         }
3773
3774         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3775         if (err < 0)
3776                 ret = err;
3777
3778         if (root_node.current) {
3779                 root_node.current->checked = 1;
3780                 maybe_free_inode_rec(&root_node.inode_cache,
3781                                 root_node.current);
3782         }
3783
3784         err = check_inode_recs(root, &root_node.inode_cache);
3785         if (!ret)
3786                 ret = err;
3787
3788         free_corrupt_blocks_tree(&corrupt_blocks);
3789         root->fs_info->corrupt_blocks = NULL;
3790         free_orphan_data_extents(&root->orphan_data_extents);
3791         return ret;
3792 }
3793
3794 static int fs_root_objectid(u64 objectid)
3795 {
3796         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3797             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3798                 return 1;
3799         return is_fstree(objectid);
3800 }
3801
3802 static int check_fs_roots(struct btrfs_root *root,
3803                           struct cache_tree *root_cache)
3804 {
3805         struct btrfs_path path;
3806         struct btrfs_key key;
3807         struct walk_control wc;
3808         struct extent_buffer *leaf, *tree_node;
3809         struct btrfs_root *tmp_root;
3810         struct btrfs_root *tree_root = root->fs_info->tree_root;
3811         int ret;
3812         int err = 0;
3813
3814         if (ctx.progress_enabled) {
3815                 ctx.tp = TASK_FS_ROOTS;
3816                 task_start(ctx.info);
3817         }
3818
3819         /*
3820          * Just in case we made any changes to the extent tree that weren't
3821          * reflected into the free space cache yet.
3822          */
3823         if (repair)
3824                 reset_cached_block_groups(root->fs_info);
3825         memset(&wc, 0, sizeof(wc));
3826         cache_tree_init(&wc.shared);
3827         btrfs_init_path(&path);
3828
3829 again:
3830         key.offset = 0;
3831         key.objectid = 0;
3832         key.type = BTRFS_ROOT_ITEM_KEY;
3833         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3834         if (ret < 0) {
3835                 err = 1;
3836                 goto out;
3837         }
3838         tree_node = tree_root->node;
3839         while (1) {
3840                 if (tree_node != tree_root->node) {
3841                         free_root_recs_tree(root_cache);
3842                         btrfs_release_path(&path);
3843                         goto again;
3844                 }
3845                 leaf = path.nodes[0];
3846                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3847                         ret = btrfs_next_leaf(tree_root, &path);
3848                         if (ret) {
3849                                 if (ret < 0)
3850                                         err = 1;
3851                                 break;
3852                         }
3853                         leaf = path.nodes[0];
3854                 }
3855                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3856                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3857                     fs_root_objectid(key.objectid)) {
3858                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3859                                 tmp_root = btrfs_read_fs_root_no_cache(
3860                                                 root->fs_info, &key);
3861                         } else {
3862                                 key.offset = (u64)-1;
3863                                 tmp_root = btrfs_read_fs_root(
3864                                                 root->fs_info, &key);
3865                         }
3866                         if (IS_ERR(tmp_root)) {
3867                                 err = 1;
3868                                 goto next;
3869                         }
3870                         ret = check_fs_root(tmp_root, root_cache, &wc);
3871                         if (ret == -EAGAIN) {
3872                                 free_root_recs_tree(root_cache);
3873                                 btrfs_release_path(&path);
3874                                 goto again;
3875                         }
3876                         if (ret)
3877                                 err = 1;
3878                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3879                                 btrfs_free_fs_root(tmp_root);
3880                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3881                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3882                         process_root_ref(leaf, path.slots[0], &key,
3883                                          root_cache);
3884                 }
3885 next:
3886                 path.slots[0]++;
3887         }
3888 out:
3889         btrfs_release_path(&path);
3890         if (err)
3891                 free_extent_cache_tree(&wc.shared);
3892         if (!cache_tree_empty(&wc.shared))
3893                 fprintf(stderr, "warning line %d\n", __LINE__);
3894
3895         task_stop(ctx.info);
3896
3897         return err;
3898 }
3899
3900 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3901 {
3902         struct rb_node *n;
3903         struct extent_backref *back;
3904         struct tree_backref *tback;
3905         struct data_backref *dback;
3906         u64 found = 0;
3907         int err = 0;
3908
3909         for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3910                 back = rb_node_to_extent_backref(n);
3911                 if (!back->found_extent_tree) {
3912                         err = 1;
3913                         if (!print_errs)
3914                                 goto out;
3915                         if (back->is_data) {
3916                                 dback = to_data_backref(back);
3917                                 fprintf(stderr, "Backref %llu %s %llu"
3918                                         " owner %llu offset %llu num_refs %lu"
3919                                         " not found in extent tree\n",
3920                                         (unsigned long long)rec->start,
3921                                         back->full_backref ?
3922                                         "parent" : "root",
3923                                         back->full_backref ?
3924                                         (unsigned long long)dback->parent:
3925                                         (unsigned long long)dback->root,
3926                                         (unsigned long long)dback->owner,
3927                                         (unsigned long long)dback->offset,
3928                                         (unsigned long)dback->num_refs);
3929                         } else {
3930                                 tback = to_tree_backref(back);
3931                                 fprintf(stderr, "Backref %llu parent %llu"
3932                                         " root %llu not found in extent tree\n",
3933                                         (unsigned long long)rec->start,
3934                                         (unsigned long long)tback->parent,
3935                                         (unsigned long long)tback->root);
3936                         }
3937                 }
3938                 if (!back->is_data && !back->found_ref) {
3939                         err = 1;
3940                         if (!print_errs)
3941                                 goto out;
3942                         tback = to_tree_backref(back);
3943                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3944                                 (unsigned long long)rec->start,
3945                                 back->full_backref ? "parent" : "root",
3946                                 back->full_backref ?
3947                                 (unsigned long long)tback->parent :
3948                                 (unsigned long long)tback->root, back);
3949                 }
3950                 if (back->is_data) {
3951                         dback = to_data_backref(back);
3952                         if (dback->found_ref != dback->num_refs) {
3953                                 err = 1;
3954                                 if (!print_errs)
3955                                         goto out;
3956                                 fprintf(stderr, "Incorrect local backref count"
3957                                         " on %llu %s %llu owner %llu"
3958                                         " offset %llu found %u wanted %u back %p\n",
3959                                         (unsigned long long)rec->start,
3960                                         back->full_backref ?
3961                                         "parent" : "root",
3962                                         back->full_backref ?
3963                                         (unsigned long long)dback->parent:
3964                                         (unsigned long long)dback->root,
3965                                         (unsigned long long)dback->owner,
3966                                         (unsigned long long)dback->offset,
3967                                         dback->found_ref, dback->num_refs, back);
3968                         }
3969                         if (dback->disk_bytenr != rec->start) {
3970                                 err = 1;
3971                                 if (!print_errs)
3972                                         goto out;
3973                                 fprintf(stderr, "Backref disk bytenr does not"
3974                                         " match extent record, bytenr=%llu, "
3975                                         "ref bytenr=%llu\n",
3976                                         (unsigned long long)rec->start,
3977                                         (unsigned long long)dback->disk_bytenr);
3978                         }
3979
3980                         if (dback->bytes != rec->nr) {
3981                                 err = 1;
3982                                 if (!print_errs)
3983                                         goto out;
3984                                 fprintf(stderr, "Backref bytes do not match "
3985                                         "extent backref, bytenr=%llu, ref "
3986                                         "bytes=%llu, backref bytes=%llu\n",
3987                                         (unsigned long long)rec->start,
3988                                         (unsigned long long)rec->nr,
3989                                         (unsigned long long)dback->bytes);
3990                         }
3991                 }
3992                 if (!back->is_data) {
3993                         found += 1;
3994                 } else {
3995                         dback = to_data_backref(back);
3996                         found += dback->found_ref;
3997                 }
3998         }
3999         if (found != rec->refs) {
4000                 err = 1;
4001                 if (!print_errs)
4002                         goto out;
4003                 fprintf(stderr, "Incorrect global backref count "
4004                         "on %llu found %llu wanted %llu\n",
4005                         (unsigned long long)rec->start,
4006                         (unsigned long long)found,
4007                         (unsigned long long)rec->refs);
4008         }
4009 out:
4010         return err;
4011 }
4012
4013 static void __free_one_backref(struct rb_node *node)
4014 {
4015         struct extent_backref *back = rb_node_to_extent_backref(node);
4016
4017         free(back);
4018 }
4019
4020 static void free_all_extent_backrefs(struct extent_record *rec)
4021 {
4022         rb_free_nodes(&rec->backref_tree, __free_one_backref);
4023 }
4024
4025 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4026                                      struct cache_tree *extent_cache)
4027 {
4028         struct cache_extent *cache;
4029         struct extent_record *rec;
4030
4031         while (1) {
4032                 cache = first_cache_extent(extent_cache);
4033                 if (!cache)
4034                         break;
4035                 rec = container_of(cache, struct extent_record, cache);
4036                 remove_cache_extent(extent_cache, cache);
4037                 free_all_extent_backrefs(rec);
4038                 free(rec);
4039         }
4040 }
4041
4042 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4043                                  struct extent_record *rec)
4044 {
4045         if (rec->content_checked && rec->owner_ref_checked &&
4046             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4047             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4048             !rec->bad_full_backref && !rec->crossing_stripes &&
4049             !rec->wrong_chunk_type) {
4050                 remove_cache_extent(extent_cache, &rec->cache);
4051                 free_all_extent_backrefs(rec);
4052                 list_del_init(&rec->list);
4053                 free(rec);
4054         }
4055         return 0;
4056 }
4057
4058 static int check_owner_ref(struct btrfs_root *root,
4059                             struct extent_record *rec,
4060                             struct extent_buffer *buf)
4061 {
4062         struct extent_backref *node, *tmp;
4063         struct tree_backref *back;
4064         struct btrfs_root *ref_root;
4065         struct btrfs_key key;
4066         struct btrfs_path path;
4067         struct extent_buffer *parent;
4068         int level;
4069         int found = 0;
4070         int ret;
4071
4072         rbtree_postorder_for_each_entry_safe(node, tmp,
4073                                              &rec->backref_tree, node) {
4074                 if (node->is_data)
4075                         continue;
4076                 if (!node->found_ref)
4077                         continue;
4078                 if (node->full_backref)
4079                         continue;
4080                 back = to_tree_backref(node);
4081                 if (btrfs_header_owner(buf) == back->root)
4082                         return 0;
4083         }
4084         BUG_ON(rec->is_root);
4085
4086         /* try to find the block by search corresponding fs tree */
4087         key.objectid = btrfs_header_owner(buf);
4088         key.type = BTRFS_ROOT_ITEM_KEY;
4089         key.offset = (u64)-1;
4090
4091         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4092         if (IS_ERR(ref_root))
4093                 return 1;
4094
4095         level = btrfs_header_level(buf);
4096         if (level == 0)
4097                 btrfs_item_key_to_cpu(buf, &key, 0);
4098         else
4099                 btrfs_node_key_to_cpu(buf, &key, 0);
4100
4101         btrfs_init_path(&path);
4102         path.lowest_level = level + 1;
4103         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4104         if (ret < 0)
4105                 return 0;
4106
4107         parent = path.nodes[level + 1];
4108         if (parent && buf->start == btrfs_node_blockptr(parent,
4109                                                         path.slots[level + 1]))
4110                 found = 1;
4111
4112         btrfs_release_path(&path);
4113         return found ? 0 : 1;
4114 }
4115
4116 static int is_extent_tree_record(struct extent_record *rec)
4117 {
4118         struct extent_backref *ref, *tmp;
4119         struct tree_backref *back;
4120         int is_extent = 0;
4121
4122         rbtree_postorder_for_each_entry_safe(ref, tmp,
4123                                              &rec->backref_tree, node) {
4124                 if (ref->is_data)
4125                         return 0;
4126                 back = to_tree_backref(ref);
4127                 if (ref->full_backref)
4128                         return 0;
4129                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4130                         is_extent = 1;
4131         }
4132         return is_extent;
4133 }
4134
4135
4136 static int record_bad_block_io(struct btrfs_fs_info *info,
4137                                struct cache_tree *extent_cache,
4138                                u64 start, u64 len)
4139 {
4140         struct extent_record *rec;
4141         struct cache_extent *cache;
4142         struct btrfs_key key;
4143
4144         cache = lookup_cache_extent(extent_cache, start, len);
4145         if (!cache)
4146                 return 0;
4147
4148         rec = container_of(cache, struct extent_record, cache);
4149         if (!is_extent_tree_record(rec))
4150                 return 0;
4151
4152         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4153         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4154 }
4155
4156 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4157                        struct extent_buffer *buf, int slot)
4158 {
4159         if (btrfs_header_level(buf)) {
4160                 struct btrfs_key_ptr ptr1, ptr2;
4161
4162                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4163                                    sizeof(struct btrfs_key_ptr));
4164                 read_extent_buffer(buf, &ptr2,
4165                                    btrfs_node_key_ptr_offset(slot + 1),
4166                                    sizeof(struct btrfs_key_ptr));
4167                 write_extent_buffer(buf, &ptr1,
4168                                     btrfs_node_key_ptr_offset(slot + 1),
4169                                     sizeof(struct btrfs_key_ptr));
4170                 write_extent_buffer(buf, &ptr2,
4171                                     btrfs_node_key_ptr_offset(slot),
4172                                     sizeof(struct btrfs_key_ptr));
4173                 if (slot == 0) {
4174                         struct btrfs_disk_key key;
4175                         btrfs_node_key(buf, &key, 0);
4176                         btrfs_fixup_low_keys(root, path, &key,
4177                                              btrfs_header_level(buf) + 1);
4178                 }
4179         } else {
4180                 struct btrfs_item *item1, *item2;
4181                 struct btrfs_key k1, k2;
4182                 char *item1_data, *item2_data;
4183                 u32 item1_offset, item2_offset, item1_size, item2_size;
4184
4185                 item1 = btrfs_item_nr(slot);
4186                 item2 = btrfs_item_nr(slot + 1);
4187                 btrfs_item_key_to_cpu(buf, &k1, slot);
4188                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4189                 item1_offset = btrfs_item_offset(buf, item1);
4190                 item2_offset = btrfs_item_offset(buf, item2);
4191                 item1_size = btrfs_item_size(buf, item1);
4192                 item2_size = btrfs_item_size(buf, item2);
4193
4194                 item1_data = malloc(item1_size);
4195                 if (!item1_data)
4196                         return -ENOMEM;
4197                 item2_data = malloc(item2_size);
4198                 if (!item2_data) {
4199                         free(item1_data);
4200                         return -ENOMEM;
4201                 }
4202
4203                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4204                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4205
4206                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4207                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4208                 free(item1_data);
4209                 free(item2_data);
4210
4211                 btrfs_set_item_offset(buf, item1, item2_offset);
4212                 btrfs_set_item_offset(buf, item2, item1_offset);
4213                 btrfs_set_item_size(buf, item1, item2_size);
4214                 btrfs_set_item_size(buf, item2, item1_size);
4215
4216                 path->slots[0] = slot;
4217                 btrfs_set_item_key_unsafe(root, path, &k2);
4218                 path->slots[0] = slot + 1;
4219                 btrfs_set_item_key_unsafe(root, path, &k1);
4220         }
4221         return 0;
4222 }
4223
4224 static int fix_key_order(struct btrfs_trans_handle *trans,
4225                          struct btrfs_root *root,
4226                          struct btrfs_path *path)
4227 {
4228         struct extent_buffer *buf;
4229         struct btrfs_key k1, k2;
4230         int i;
4231         int level = path->lowest_level;
4232         int ret = -EIO;
4233
4234         buf = path->nodes[level];
4235         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4236                 if (level) {
4237                         btrfs_node_key_to_cpu(buf, &k1, i);
4238                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4239                 } else {
4240                         btrfs_item_key_to_cpu(buf, &k1, i);
4241                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4242                 }
4243                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4244                         continue;
4245                 ret = swap_values(root, path, buf, i);
4246                 if (ret)
4247                         break;
4248                 btrfs_mark_buffer_dirty(buf);
4249                 i = 0;
4250         }
4251         return ret;
4252 }
4253
4254 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4255                              struct btrfs_root *root,
4256                              struct btrfs_path *path,
4257                              struct extent_buffer *buf, int slot)
4258 {
4259         struct btrfs_key key;
4260         int nritems = btrfs_header_nritems(buf);
4261
4262         btrfs_item_key_to_cpu(buf, &key, slot);
4263
4264         /* These are all the keys we can deal with missing. */
4265         if (key.type != BTRFS_DIR_INDEX_KEY &&
4266             key.type != BTRFS_EXTENT_ITEM_KEY &&
4267             key.type != BTRFS_METADATA_ITEM_KEY &&
4268             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4269             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4270                 return -1;
4271
4272         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4273                (unsigned long long)key.objectid, key.type,
4274                (unsigned long long)key.offset, slot, buf->start);
4275         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4276                               btrfs_item_nr_offset(slot + 1),
4277                               sizeof(struct btrfs_item) *
4278                               (nritems - slot - 1));
4279         btrfs_set_header_nritems(buf, nritems - 1);
4280         if (slot == 0) {
4281                 struct btrfs_disk_key disk_key;
4282
4283                 btrfs_item_key(buf, &disk_key, 0);
4284                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4285         }
4286         btrfs_mark_buffer_dirty(buf);
4287         return 0;
4288 }
4289
4290 static int fix_item_offset(struct btrfs_trans_handle *trans,
4291                            struct btrfs_root *root,
4292                            struct btrfs_path *path)
4293 {
4294         struct extent_buffer *buf;
4295         int i;
4296         int ret = 0;
4297
4298         /* We should only get this for leaves */
4299         BUG_ON(path->lowest_level);
4300         buf = path->nodes[0];
4301 again:
4302         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4303                 unsigned int shift = 0, offset;
4304
4305                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4306                     BTRFS_LEAF_DATA_SIZE(root)) {
4307                         if (btrfs_item_end_nr(buf, i) >
4308                             BTRFS_LEAF_DATA_SIZE(root)) {
4309                                 ret = delete_bogus_item(trans, root, path,
4310                                                         buf, i);
4311                                 if (!ret)
4312                                         goto again;
4313                                 fprintf(stderr, "item is off the end of the "
4314                                         "leaf, can't fix\n");
4315                                 ret = -EIO;
4316                                 break;
4317                         }
4318                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4319                                 btrfs_item_end_nr(buf, i);
4320                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4321                            btrfs_item_offset_nr(buf, i - 1)) {
4322                         if (btrfs_item_end_nr(buf, i) >
4323                             btrfs_item_offset_nr(buf, i - 1)) {
4324                                 ret = delete_bogus_item(trans, root, path,
4325                                                         buf, i);
4326                                 if (!ret)
4327                                         goto again;
4328                                 fprintf(stderr, "items overlap, can't fix\n");
4329                                 ret = -EIO;
4330                                 break;
4331                         }
4332                         shift = btrfs_item_offset_nr(buf, i - 1) -
4333                                 btrfs_item_end_nr(buf, i);
4334                 }
4335                 if (!shift)
4336                         continue;
4337
4338                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4339                        i, shift, (unsigned long long)buf->start);
4340                 offset = btrfs_item_offset_nr(buf, i);
4341                 memmove_extent_buffer(buf,
4342                                       btrfs_leaf_data(buf) + offset + shift,
4343                                       btrfs_leaf_data(buf) + offset,
4344                                       btrfs_item_size_nr(buf, i));
4345                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4346                                       offset + shift);
4347                 btrfs_mark_buffer_dirty(buf);
4348         }
4349
4350         /*
4351          * We may have moved things, in which case we want to exit so we don't
4352          * write those changes out.  Once we have proper abort functionality in
4353          * progs this can be changed to something nicer.
4354          */
4355         BUG_ON(ret);
4356         return ret;
4357 }
4358
4359 /*
4360  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4361  * then just return -EIO.
4362  */
4363 static int try_to_fix_bad_block(struct btrfs_root *root,
4364                                 struct extent_buffer *buf,
4365                                 enum btrfs_tree_block_status status)
4366 {
4367         struct btrfs_trans_handle *trans;
4368         struct ulist *roots;
4369         struct ulist_node *node;
4370         struct btrfs_root *search_root;
4371         struct btrfs_path *path;
4372         struct ulist_iterator iter;
4373         struct btrfs_key root_key, key;
4374         int ret;
4375
4376         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4377             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4378                 return -EIO;
4379
4380         path = btrfs_alloc_path();
4381         if (!path)
4382                 return -EIO;
4383
4384         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4385                                    0, &roots);
4386         if (ret) {
4387                 btrfs_free_path(path);
4388                 return -EIO;
4389         }
4390
4391         ULIST_ITER_INIT(&iter);
4392         while ((node = ulist_next(roots, &iter))) {
4393                 root_key.objectid = node->val;
4394                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4395                 root_key.offset = (u64)-1;
4396
4397                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4398                 if (IS_ERR(root)) {
4399                         ret = -EIO;
4400                         break;
4401                 }
4402
4403
4404                 trans = btrfs_start_transaction(search_root, 0);
4405                 if (IS_ERR(trans)) {
4406                         ret = PTR_ERR(trans);
4407                         break;
4408                 }
4409
4410                 path->lowest_level = btrfs_header_level(buf);
4411                 path->skip_check_block = 1;
4412                 if (path->lowest_level)
4413                         btrfs_node_key_to_cpu(buf, &key, 0);
4414                 else
4415                         btrfs_item_key_to_cpu(buf, &key, 0);
4416                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4417                 if (ret) {
4418                         ret = -EIO;
4419                         btrfs_commit_transaction(trans, search_root);
4420                         break;
4421                 }
4422                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4423                         ret = fix_key_order(trans, search_root, path);
4424                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4425                         ret = fix_item_offset(trans, search_root, path);
4426                 if (ret) {
4427                         btrfs_commit_transaction(trans, search_root);
4428                         break;
4429                 }
4430                 btrfs_release_path(path);
4431                 btrfs_commit_transaction(trans, search_root);
4432         }
4433         ulist_free(roots);
4434         btrfs_free_path(path);
4435         return ret;
4436 }
4437
4438 static int check_block(struct btrfs_root *root,
4439                        struct cache_tree *extent_cache,
4440                        struct extent_buffer *buf, u64 flags)
4441 {
4442         struct extent_record *rec;
4443         struct cache_extent *cache;
4444         struct btrfs_key key;
4445         enum btrfs_tree_block_status status;
4446         int ret = 0;
4447         int level;
4448
4449         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4450         if (!cache)
4451                 return 1;
4452         rec = container_of(cache, struct extent_record, cache);
4453         rec->generation = btrfs_header_generation(buf);
4454
4455         level = btrfs_header_level(buf);
4456         if (btrfs_header_nritems(buf) > 0) {
4457
4458                 if (level == 0)
4459                         btrfs_item_key_to_cpu(buf, &key, 0);
4460                 else
4461                         btrfs_node_key_to_cpu(buf, &key, 0);
4462
4463                 rec->info_objectid = key.objectid;
4464         }
4465         rec->info_level = level;
4466
4467         if (btrfs_is_leaf(buf))
4468                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4469         else
4470                 status = btrfs_check_node(root, &rec->parent_key, buf);
4471
4472         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4473                 if (repair)
4474                         status = try_to_fix_bad_block(root, buf, status);
4475                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4476                         ret = -EIO;
4477                         fprintf(stderr, "bad block %llu\n",
4478                                 (unsigned long long)buf->start);
4479                 } else {
4480                         /*
4481                          * Signal to callers we need to start the scan over
4482                          * again since we'll have cowed blocks.
4483                          */
4484                         ret = -EAGAIN;
4485                 }
4486         } else {
4487                 rec->content_checked = 1;
4488                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4489                         rec->owner_ref_checked = 1;
4490                 else {
4491                         ret = check_owner_ref(root, rec, buf);
4492                         if (!ret)
4493                                 rec->owner_ref_checked = 1;
4494                 }
4495         }
4496         if (!ret)
4497                 maybe_free_extent_rec(extent_cache, rec);
4498         return ret;
4499 }
4500
4501
4502 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4503                                                 u64 parent, u64 root)
4504 {
4505         struct rb_node *node;
4506         struct tree_backref *back = NULL;
4507         struct tree_backref match = {
4508                 .node = {
4509                         .is_data = 0,
4510                 },
4511         };
4512
4513         if (parent) {
4514                 match.parent = parent;
4515                 match.node.full_backref = 1;
4516         } else {
4517                 match.root = root;
4518         }
4519
4520         node = rb_search(&rec->backref_tree, &match.node.node,
4521                          (rb_compare_keys)compare_extent_backref, NULL);
4522         if (node)
4523                 back = to_tree_backref(rb_node_to_extent_backref(node));
4524
4525         return back;
4526 }
4527
4528 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4529                                                 u64 parent, u64 root)
4530 {
4531         struct tree_backref *ref = malloc(sizeof(*ref));
4532
4533         if (!ref)
4534                 return NULL;
4535         memset(&ref->node, 0, sizeof(ref->node));
4536         if (parent > 0) {
4537                 ref->parent = parent;
4538                 ref->node.full_backref = 1;
4539         } else {
4540                 ref->root = root;
4541                 ref->node.full_backref = 0;
4542         }
4543         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4544
4545         return ref;
4546 }
4547
4548 static struct data_backref *find_data_backref(struct extent_record *rec,
4549                                                 u64 parent, u64 root,
4550                                                 u64 owner, u64 offset,
4551                                                 int found_ref,
4552                                                 u64 disk_bytenr, u64 bytes)
4553 {
4554         struct rb_node *node;
4555         struct data_backref *back = NULL;
4556         struct data_backref match = {
4557                 .node = {
4558                         .is_data = 1,
4559                 },
4560                 .owner = owner,
4561                 .offset = offset,
4562                 .bytes = bytes,
4563                 .found_ref = found_ref,
4564                 .disk_bytenr = disk_bytenr,
4565         };
4566
4567         if (parent) {
4568                 match.parent = parent;
4569                 match.node.full_backref = 1;
4570         } else {
4571                 match.root = root;
4572         }
4573
4574         node = rb_search(&rec->backref_tree, &match.node.node,
4575                          (rb_compare_keys)compare_extent_backref, NULL);
4576         if (node)
4577                 back = to_data_backref(rb_node_to_extent_backref(node));
4578
4579         return back;
4580 }
4581
4582 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4583                                                 u64 parent, u64 root,
4584                                                 u64 owner, u64 offset,
4585                                                 u64 max_size)
4586 {
4587         struct data_backref *ref = malloc(sizeof(*ref));
4588
4589         if (!ref)
4590                 return NULL;
4591         memset(&ref->node, 0, sizeof(ref->node));
4592         ref->node.is_data = 1;
4593
4594         if (parent > 0) {
4595                 ref->parent = parent;
4596                 ref->owner = 0;
4597                 ref->offset = 0;
4598                 ref->node.full_backref = 1;
4599         } else {
4600                 ref->root = root;
4601                 ref->owner = owner;
4602                 ref->offset = offset;
4603                 ref->node.full_backref = 0;
4604         }
4605         ref->bytes = max_size;
4606         ref->found_ref = 0;
4607         ref->num_refs = 0;
4608         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4609         if (max_size > rec->max_size)
4610                 rec->max_size = max_size;
4611         return ref;
4612 }
4613
4614 /* Check if the type of extent matches with its chunk */
4615 static void check_extent_type(struct extent_record *rec)
4616 {
4617         struct btrfs_block_group_cache *bg_cache;
4618
4619         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4620         if (!bg_cache)
4621                 return;
4622
4623         /* data extent, check chunk directly*/
4624         if (!rec->metadata) {
4625                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4626                         rec->wrong_chunk_type = 1;
4627                 return;
4628         }
4629
4630         /* metadata extent, check the obvious case first */
4631         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4632                                  BTRFS_BLOCK_GROUP_METADATA))) {
4633                 rec->wrong_chunk_type = 1;
4634                 return;
4635         }
4636
4637         /*
4638          * Check SYSTEM extent, as it's also marked as metadata, we can only
4639          * make sure it's a SYSTEM extent by its backref
4640          */
4641         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4642                 struct extent_backref *node;
4643                 struct tree_backref *tback;
4644                 u64 bg_type;
4645
4646                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4647                 if (node->is_data) {
4648                         /* tree block shouldn't have data backref */
4649                         rec->wrong_chunk_type = 1;
4650                         return;
4651                 }
4652                 tback = container_of(node, struct tree_backref, node);
4653
4654                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4655                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4656                 else
4657                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4658                 if (!(bg_cache->flags & bg_type))
4659                         rec->wrong_chunk_type = 1;
4660         }
4661 }
4662
4663 /*
4664  * Allocate a new extent record, fill default values from @tmpl and insert int
4665  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4666  * the cache, otherwise it fails.
4667  */
4668 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4669                 struct extent_record *tmpl)
4670 {
4671         struct extent_record *rec;
4672         int ret = 0;
4673
4674         rec = malloc(sizeof(*rec));
4675         if (!rec)
4676                 return -ENOMEM;
4677         rec->start = tmpl->start;
4678         rec->max_size = tmpl->max_size;
4679         rec->nr = max(tmpl->nr, tmpl->max_size);
4680         rec->found_rec = tmpl->found_rec;
4681         rec->content_checked = tmpl->content_checked;
4682         rec->owner_ref_checked = tmpl->owner_ref_checked;
4683         rec->num_duplicates = 0;
4684         rec->metadata = tmpl->metadata;
4685         rec->flag_block_full_backref = FLAG_UNSET;
4686         rec->bad_full_backref = 0;
4687         rec->crossing_stripes = 0;
4688         rec->wrong_chunk_type = 0;
4689         rec->is_root = tmpl->is_root;
4690         rec->refs = tmpl->refs;
4691         rec->extent_item_refs = tmpl->extent_item_refs;
4692         rec->parent_generation = tmpl->parent_generation;
4693         INIT_LIST_HEAD(&rec->backrefs);
4694         INIT_LIST_HEAD(&rec->dups);
4695         INIT_LIST_HEAD(&rec->list);
4696         rec->backref_tree = RB_ROOT;
4697         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4698         rec->cache.start = tmpl->start;
4699         rec->cache.size = tmpl->nr;
4700         ret = insert_cache_extent(extent_cache, &rec->cache);
4701         BUG_ON(ret);
4702         bytes_used += rec->nr;
4703
4704         if (tmpl->metadata)
4705                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4706                                 global_info->tree_root->nodesize);
4707         check_extent_type(rec);
4708         return ret;
4709 }
4710
4711 /*
4712  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4713  * some are hints:
4714  * - refs              - if found, increase refs
4715  * - is_root           - if found, set
4716  * - content_checked   - if found, set
4717  * - owner_ref_checked - if found, set
4718  *
4719  * If not found, create a new one, initialize and insert.
4720  */
4721 static int add_extent_rec(struct cache_tree *extent_cache,
4722                 struct extent_record *tmpl)
4723 {
4724         struct extent_record *rec;
4725         struct cache_extent *cache;
4726         int ret = 0;
4727         int dup = 0;
4728
4729         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4730         if (cache) {
4731                 rec = container_of(cache, struct extent_record, cache);
4732                 if (tmpl->refs)
4733                         rec->refs++;
4734                 if (rec->nr == 1)
4735                         rec->nr = max(tmpl->nr, tmpl->max_size);
4736
4737                 /*
4738                  * We need to make sure to reset nr to whatever the extent
4739                  * record says was the real size, this way we can compare it to
4740                  * the backrefs.
4741                  */
4742                 if (tmpl->found_rec) {
4743                         if (tmpl->start != rec->start || rec->found_rec) {
4744                                 struct extent_record *tmp;
4745
4746                                 dup = 1;
4747                                 if (list_empty(&rec->list))
4748                                         list_add_tail(&rec->list,
4749                                                       &duplicate_extents);
4750
4751                                 /*
4752                                  * We have to do this song and dance in case we
4753                                  * find an extent record that falls inside of
4754                                  * our current extent record but does not have
4755                                  * the same objectid.
4756                                  */
4757                                 tmp = malloc(sizeof(*tmp));
4758                                 if (!tmp)
4759                                         return -ENOMEM;
4760                                 tmp->start = tmpl->start;
4761                                 tmp->max_size = tmpl->max_size;
4762                                 tmp->nr = tmpl->nr;
4763                                 tmp->found_rec = 1;
4764                                 tmp->metadata = tmpl->metadata;
4765                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4766                                 INIT_LIST_HEAD(&tmp->list);
4767                                 list_add_tail(&tmp->list, &rec->dups);
4768                                 rec->num_duplicates++;
4769                         } else {
4770                                 rec->nr = tmpl->nr;
4771                                 rec->found_rec = 1;
4772                         }
4773                 }
4774
4775                 if (tmpl->extent_item_refs && !dup) {
4776                         if (rec->extent_item_refs) {
4777                                 fprintf(stderr, "block %llu rec "
4778                                         "extent_item_refs %llu, passed %llu\n",
4779                                         (unsigned long long)tmpl->start,
4780                                         (unsigned long long)
4781                                                         rec->extent_item_refs,
4782                                         (unsigned long long)tmpl->extent_item_refs);
4783                         }
4784                         rec->extent_item_refs = tmpl->extent_item_refs;
4785                 }
4786                 if (tmpl->is_root)
4787                         rec->is_root = 1;
4788                 if (tmpl->content_checked)
4789                         rec->content_checked = 1;
4790                 if (tmpl->owner_ref_checked)
4791                         rec->owner_ref_checked = 1;
4792                 memcpy(&rec->parent_key, &tmpl->parent_key,
4793                                 sizeof(tmpl->parent_key));
4794                 if (tmpl->parent_generation)
4795                         rec->parent_generation = tmpl->parent_generation;
4796                 if (rec->max_size < tmpl->max_size)
4797                         rec->max_size = tmpl->max_size;
4798
4799                 /*
4800                  * A metadata extent can't cross stripe_len boundary, otherwise
4801                  * kernel scrub won't be able to handle it.
4802                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4803                  * it.
4804                  */
4805                 if (tmpl->metadata)
4806                         rec->crossing_stripes = check_crossing_stripes(
4807                                 rec->start, global_info->tree_root->nodesize);
4808                 check_extent_type(rec);
4809                 maybe_free_extent_rec(extent_cache, rec);
4810                 return ret;
4811         }
4812
4813         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4814
4815         return ret;
4816 }
4817
4818 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4819                             u64 parent, u64 root, int found_ref)
4820 {
4821         struct extent_record *rec;
4822         struct tree_backref *back;
4823         struct cache_extent *cache;
4824
4825         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4826         if (!cache) {
4827                 struct extent_record tmpl;
4828
4829                 memset(&tmpl, 0, sizeof(tmpl));
4830                 tmpl.start = bytenr;
4831                 tmpl.nr = 1;
4832                 tmpl.metadata = 1;
4833
4834                 add_extent_rec_nolookup(extent_cache, &tmpl);
4835
4836                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4837                 if (!cache)
4838                         abort();
4839         }
4840
4841         rec = container_of(cache, struct extent_record, cache);
4842         if (rec->start != bytenr) {
4843                 abort();
4844         }
4845
4846         back = find_tree_backref(rec, parent, root);
4847         if (!back) {
4848                 back = alloc_tree_backref(rec, parent, root);
4849                 BUG_ON(!back);
4850         }
4851
4852         if (found_ref) {
4853                 if (back->node.found_ref) {
4854                         fprintf(stderr, "Extent back ref already exists "
4855                                 "for %llu parent %llu root %llu \n",
4856                                 (unsigned long long)bytenr,
4857                                 (unsigned long long)parent,
4858                                 (unsigned long long)root);
4859                 }
4860                 back->node.found_ref = 1;
4861         } else {
4862                 if (back->node.found_extent_tree) {
4863                         fprintf(stderr, "Extent back ref already exists "
4864                                 "for %llu parent %llu root %llu \n",
4865                                 (unsigned long long)bytenr,
4866                                 (unsigned long long)parent,
4867                                 (unsigned long long)root);
4868                 }
4869                 back->node.found_extent_tree = 1;
4870         }
4871         check_extent_type(rec);
4872         maybe_free_extent_rec(extent_cache, rec);
4873         return 0;
4874 }
4875
4876 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4877                             u64 parent, u64 root, u64 owner, u64 offset,
4878                             u32 num_refs, int found_ref, u64 max_size)
4879 {
4880         struct extent_record *rec;
4881         struct data_backref *back;
4882         struct cache_extent *cache;
4883
4884         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4885         if (!cache) {
4886                 struct extent_record tmpl;
4887
4888                 memset(&tmpl, 0, sizeof(tmpl));
4889                 tmpl.start = bytenr;
4890                 tmpl.nr = 1;
4891                 tmpl.max_size = max_size;
4892
4893                 add_extent_rec_nolookup(extent_cache, &tmpl);
4894
4895                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4896                 if (!cache)
4897                         abort();
4898         }
4899
4900         rec = container_of(cache, struct extent_record, cache);
4901         if (rec->max_size < max_size)
4902                 rec->max_size = max_size;
4903
4904         /*
4905          * If found_ref is set then max_size is the real size and must match the
4906          * existing refs.  So if we have already found a ref then we need to
4907          * make sure that this ref matches the existing one, otherwise we need
4908          * to add a new backref so we can notice that the backrefs don't match
4909          * and we need to figure out who is telling the truth.  This is to
4910          * account for that awful fsync bug I introduced where we'd end up with
4911          * a btrfs_file_extent_item that would have its length include multiple
4912          * prealloc extents or point inside of a prealloc extent.
4913          */
4914         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4915                                  bytenr, max_size);
4916         if (!back) {
4917                 back = alloc_data_backref(rec, parent, root, owner, offset,
4918                                           max_size);
4919                 BUG_ON(!back);
4920         }
4921
4922         if (found_ref) {
4923                 BUG_ON(num_refs != 1);
4924                 if (back->node.found_ref)
4925                         BUG_ON(back->bytes != max_size);
4926                 back->node.found_ref = 1;
4927                 back->found_ref += 1;
4928                 back->bytes = max_size;
4929                 back->disk_bytenr = bytenr;
4930                 rec->refs += 1;
4931                 rec->content_checked = 1;
4932                 rec->owner_ref_checked = 1;
4933         } else {
4934                 if (back->node.found_extent_tree) {
4935                         fprintf(stderr, "Extent back ref already exists "
4936                                 "for %llu parent %llu root %llu "
4937                                 "owner %llu offset %llu num_refs %lu\n",
4938                                 (unsigned long long)bytenr,
4939                                 (unsigned long long)parent,
4940                                 (unsigned long long)root,
4941                                 (unsigned long long)owner,
4942                                 (unsigned long long)offset,
4943                                 (unsigned long)num_refs);
4944                 }
4945                 back->num_refs = num_refs;
4946                 back->node.found_extent_tree = 1;
4947         }
4948         maybe_free_extent_rec(extent_cache, rec);
4949         return 0;
4950 }
4951
4952 static int add_pending(struct cache_tree *pending,
4953                        struct cache_tree *seen, u64 bytenr, u32 size)
4954 {
4955         int ret;
4956         ret = add_cache_extent(seen, bytenr, size);
4957         if (ret)
4958                 return ret;
4959         add_cache_extent(pending, bytenr, size);
4960         return 0;
4961 }
4962
4963 static int pick_next_pending(struct cache_tree *pending,
4964                         struct cache_tree *reada,
4965                         struct cache_tree *nodes,
4966                         u64 last, struct block_info *bits, int bits_nr,
4967                         int *reada_bits)
4968 {
4969         unsigned long node_start = last;
4970         struct cache_extent *cache;
4971         int ret;
4972
4973         cache = search_cache_extent(reada, 0);
4974         if (cache) {
4975                 bits[0].start = cache->start;
4976                 bits[0].size = cache->size;
4977                 *reada_bits = 1;
4978                 return 1;
4979         }
4980         *reada_bits = 0;
4981         if (node_start > 32768)
4982                 node_start -= 32768;
4983
4984         cache = search_cache_extent(nodes, node_start);
4985         if (!cache)
4986                 cache = search_cache_extent(nodes, 0);
4987
4988         if (!cache) {
4989                  cache = search_cache_extent(pending, 0);
4990                  if (!cache)
4991                          return 0;
4992                  ret = 0;
4993                  do {
4994                          bits[ret].start = cache->start;
4995                          bits[ret].size = cache->size;
4996                          cache = next_cache_extent(cache);
4997                          ret++;
4998                  } while (cache && ret < bits_nr);
4999                  return ret;
5000         }
5001
5002         ret = 0;
5003         do {
5004                 bits[ret].start = cache->start;
5005                 bits[ret].size = cache->size;
5006                 cache = next_cache_extent(cache);
5007                 ret++;
5008         } while (cache && ret < bits_nr);
5009
5010         if (bits_nr - ret > 8) {
5011                 u64 lookup = bits[0].start + bits[0].size;
5012                 struct cache_extent *next;
5013                 next = search_cache_extent(pending, lookup);
5014                 while(next) {
5015                         if (next->start - lookup > 32768)
5016                                 break;
5017                         bits[ret].start = next->start;
5018                         bits[ret].size = next->size;
5019                         lookup = next->start + next->size;
5020                         ret++;
5021                         if (ret == bits_nr)
5022                                 break;
5023                         next = next_cache_extent(next);
5024                         if (!next)
5025                                 break;
5026                 }
5027         }
5028         return ret;
5029 }
5030
5031 static void free_chunk_record(struct cache_extent *cache)
5032 {
5033         struct chunk_record *rec;
5034
5035         rec = container_of(cache, struct chunk_record, cache);
5036         list_del_init(&rec->list);
5037         list_del_init(&rec->dextents);
5038         free(rec);
5039 }
5040
5041 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5042 {
5043         cache_tree_free_extents(chunk_cache, free_chunk_record);
5044 }
5045
5046 static void free_device_record(struct rb_node *node)
5047 {
5048         struct device_record *rec;
5049
5050         rec = container_of(node, struct device_record, node);
5051         free(rec);
5052 }
5053
5054 FREE_RB_BASED_TREE(device_cache, free_device_record);
5055
5056 int insert_block_group_record(struct block_group_tree *tree,
5057                               struct block_group_record *bg_rec)
5058 {
5059         int ret;
5060
5061         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5062         if (ret)
5063                 return ret;
5064
5065         list_add_tail(&bg_rec->list, &tree->block_groups);
5066         return 0;
5067 }
5068
5069 static void free_block_group_record(struct cache_extent *cache)
5070 {
5071         struct block_group_record *rec;
5072
5073         rec = container_of(cache, struct block_group_record, cache);
5074         list_del_init(&rec->list);
5075         free(rec);
5076 }
5077
5078 void free_block_group_tree(struct block_group_tree *tree)
5079 {
5080         cache_tree_free_extents(&tree->tree, free_block_group_record);
5081 }
5082
5083 int insert_device_extent_record(struct device_extent_tree *tree,
5084                                 struct device_extent_record *de_rec)
5085 {
5086         int ret;
5087
5088         /*
5089          * Device extent is a bit different from the other extents, because
5090          * the extents which belong to the different devices may have the
5091          * same start and size, so we need use the special extent cache
5092          * search/insert functions.
5093          */
5094         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5095         if (ret)
5096                 return ret;
5097
5098         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5099         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5100         return 0;
5101 }
5102
5103 static void free_device_extent_record(struct cache_extent *cache)
5104 {
5105         struct device_extent_record *rec;
5106
5107         rec = container_of(cache, struct device_extent_record, cache);
5108         if (!list_empty(&rec->chunk_list))
5109                 list_del_init(&rec->chunk_list);
5110         if (!list_empty(&rec->device_list))
5111                 list_del_init(&rec->device_list);
5112         free(rec);
5113 }
5114
5115 void free_device_extent_tree(struct device_extent_tree *tree)
5116 {
5117         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5118 }
5119
5120 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5121 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5122                                  struct extent_buffer *leaf, int slot)
5123 {
5124         struct btrfs_extent_ref_v0 *ref0;
5125         struct btrfs_key key;
5126
5127         btrfs_item_key_to_cpu(leaf, &key, slot);
5128         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5129         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5130                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5131         } else {
5132                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5133                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5134         }
5135         return 0;
5136 }
5137 #endif
5138
5139 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5140                                             struct btrfs_key *key,
5141                                             int slot)
5142 {
5143         struct btrfs_chunk *ptr;
5144         struct chunk_record *rec;
5145         int num_stripes, i;
5146
5147         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5148         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5149
5150         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5151         if (!rec) {
5152                 fprintf(stderr, "memory allocation failed\n");
5153                 exit(-1);
5154         }
5155
5156         INIT_LIST_HEAD(&rec->list);
5157         INIT_LIST_HEAD(&rec->dextents);
5158         rec->bg_rec = NULL;
5159
5160         rec->cache.start = key->offset;
5161         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5162
5163         rec->generation = btrfs_header_generation(leaf);
5164
5165         rec->objectid = key->objectid;
5166         rec->type = key->type;
5167         rec->offset = key->offset;
5168
5169         rec->length = rec->cache.size;
5170         rec->owner = btrfs_chunk_owner(leaf, ptr);
5171         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5172         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5173         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5174         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5175         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5176         rec->num_stripes = num_stripes;
5177         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5178
5179         for (i = 0; i < rec->num_stripes; ++i) {
5180                 rec->stripes[i].devid =
5181                         btrfs_stripe_devid_nr(leaf, ptr, i);
5182                 rec->stripes[i].offset =
5183                         btrfs_stripe_offset_nr(leaf, ptr, i);
5184                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5185                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5186                                 BTRFS_UUID_SIZE);
5187         }
5188
5189         return rec;
5190 }
5191
5192 static int process_chunk_item(struct cache_tree *chunk_cache,
5193                               struct btrfs_key *key, struct extent_buffer *eb,
5194                               int slot)
5195 {
5196         struct chunk_record *rec;
5197         int ret = 0;
5198
5199         rec = btrfs_new_chunk_record(eb, key, slot);
5200         ret = insert_cache_extent(chunk_cache, &rec->cache);
5201         if (ret) {
5202                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5203                         rec->offset, rec->length);
5204                 free(rec);
5205         }
5206
5207         return ret;
5208 }
5209
5210 static int process_device_item(struct rb_root *dev_cache,
5211                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5212 {
5213         struct btrfs_dev_item *ptr;
5214         struct device_record *rec;
5215         int ret = 0;
5216
5217         ptr = btrfs_item_ptr(eb,
5218                 slot, struct btrfs_dev_item);
5219
5220         rec = malloc(sizeof(*rec));
5221         if (!rec) {
5222                 fprintf(stderr, "memory allocation failed\n");
5223                 return -ENOMEM;
5224         }
5225
5226         rec->devid = key->offset;
5227         rec->generation = btrfs_header_generation(eb);
5228
5229         rec->objectid = key->objectid;
5230         rec->type = key->type;
5231         rec->offset = key->offset;
5232
5233         rec->devid = btrfs_device_id(eb, ptr);
5234         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5235         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5236
5237         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5238         if (ret) {
5239                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5240                 free(rec);
5241         }
5242
5243         return ret;
5244 }
5245
5246 struct block_group_record *
5247 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5248                              int slot)
5249 {
5250         struct btrfs_block_group_item *ptr;
5251         struct block_group_record *rec;
5252
5253         rec = calloc(1, sizeof(*rec));
5254         if (!rec) {
5255                 fprintf(stderr, "memory allocation failed\n");
5256                 exit(-1);
5257         }
5258
5259         rec->cache.start = key->objectid;
5260         rec->cache.size = key->offset;
5261
5262         rec->generation = btrfs_header_generation(leaf);
5263
5264         rec->objectid = key->objectid;
5265         rec->type = key->type;
5266         rec->offset = key->offset;
5267
5268         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5269         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5270
5271         INIT_LIST_HEAD(&rec->list);
5272
5273         return rec;
5274 }
5275
5276 static int process_block_group_item(struct block_group_tree *block_group_cache,
5277                                     struct btrfs_key *key,
5278                                     struct extent_buffer *eb, int slot)
5279 {
5280         struct block_group_record *rec;
5281         int ret = 0;
5282
5283         rec = btrfs_new_block_group_record(eb, key, slot);
5284         ret = insert_block_group_record(block_group_cache, rec);
5285         if (ret) {
5286                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5287                         rec->objectid, rec->offset);
5288                 free(rec);
5289         }
5290
5291         return ret;
5292 }
5293
5294 struct device_extent_record *
5295 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5296                                struct btrfs_key *key, int slot)
5297 {
5298         struct device_extent_record *rec;
5299         struct btrfs_dev_extent *ptr;
5300
5301         rec = calloc(1, sizeof(*rec));
5302         if (!rec) {
5303                 fprintf(stderr, "memory allocation failed\n");
5304                 exit(-1);
5305         }
5306
5307         rec->cache.objectid = key->objectid;
5308         rec->cache.start = key->offset;
5309
5310         rec->generation = btrfs_header_generation(leaf);
5311
5312         rec->objectid = key->objectid;
5313         rec->type = key->type;
5314         rec->offset = key->offset;
5315
5316         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5317         rec->chunk_objecteid =
5318                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5319         rec->chunk_offset =
5320                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5321         rec->length = btrfs_dev_extent_length(leaf, ptr);
5322         rec->cache.size = rec->length;
5323
5324         INIT_LIST_HEAD(&rec->chunk_list);
5325         INIT_LIST_HEAD(&rec->device_list);
5326
5327         return rec;
5328 }
5329
5330 static int
5331 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5332                            struct btrfs_key *key, struct extent_buffer *eb,
5333                            int slot)
5334 {
5335         struct device_extent_record *rec;
5336         int ret;
5337
5338         rec = btrfs_new_device_extent_record(eb, key, slot);
5339         ret = insert_device_extent_record(dev_extent_cache, rec);
5340         if (ret) {
5341                 fprintf(stderr,
5342                         "Device extent[%llu, %llu, %llu] existed.\n",
5343                         rec->objectid, rec->offset, rec->length);
5344                 free(rec);
5345         }
5346
5347         return ret;
5348 }
5349
5350 static int process_extent_item(struct btrfs_root *root,
5351                                struct cache_tree *extent_cache,
5352                                struct extent_buffer *eb, int slot)
5353 {
5354         struct btrfs_extent_item *ei;
5355         struct btrfs_extent_inline_ref *iref;
5356         struct btrfs_extent_data_ref *dref;
5357         struct btrfs_shared_data_ref *sref;
5358         struct btrfs_key key;
5359         struct extent_record tmpl;
5360         unsigned long end;
5361         unsigned long ptr;
5362         int type;
5363         u32 item_size = btrfs_item_size_nr(eb, slot);
5364         u64 refs = 0;
5365         u64 offset;
5366         u64 num_bytes;
5367         int metadata = 0;
5368
5369         btrfs_item_key_to_cpu(eb, &key, slot);
5370
5371         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5372                 metadata = 1;
5373                 num_bytes = root->nodesize;
5374         } else {
5375                 num_bytes = key.offset;
5376         }
5377
5378         if (item_size < sizeof(*ei)) {
5379 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5380                 struct btrfs_extent_item_v0 *ei0;
5381                 BUG_ON(item_size != sizeof(*ei0));
5382                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5383                 refs = btrfs_extent_refs_v0(eb, ei0);
5384 #else
5385                 BUG();
5386 #endif
5387                 memset(&tmpl, 0, sizeof(tmpl));
5388                 tmpl.start = key.objectid;
5389                 tmpl.nr = num_bytes;
5390                 tmpl.extent_item_refs = refs;
5391                 tmpl.metadata = metadata;
5392                 tmpl.found_rec = 1;
5393                 tmpl.max_size = num_bytes;
5394
5395                 return add_extent_rec(extent_cache, &tmpl);
5396         }
5397
5398         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5399         refs = btrfs_extent_refs(eb, ei);
5400         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5401                 metadata = 1;
5402         else
5403                 metadata = 0;
5404
5405         memset(&tmpl, 0, sizeof(tmpl));
5406         tmpl.start = key.objectid;
5407         tmpl.nr = num_bytes;
5408         tmpl.extent_item_refs = refs;
5409         tmpl.metadata = metadata;
5410         tmpl.found_rec = 1;
5411         tmpl.max_size = num_bytes;
5412         add_extent_rec(extent_cache, &tmpl);
5413
5414         ptr = (unsigned long)(ei + 1);
5415         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5416             key.type == BTRFS_EXTENT_ITEM_KEY)
5417                 ptr += sizeof(struct btrfs_tree_block_info);
5418
5419         end = (unsigned long)ei + item_size;
5420         while (ptr < end) {
5421                 iref = (struct btrfs_extent_inline_ref *)ptr;
5422                 type = btrfs_extent_inline_ref_type(eb, iref);
5423                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5424                 switch (type) {
5425                 case BTRFS_TREE_BLOCK_REF_KEY:
5426                         add_tree_backref(extent_cache, key.objectid,
5427                                          0, offset, 0);
5428                         break;
5429                 case BTRFS_SHARED_BLOCK_REF_KEY:
5430                         add_tree_backref(extent_cache, key.objectid,
5431                                          offset, 0, 0);
5432                         break;
5433                 case BTRFS_EXTENT_DATA_REF_KEY:
5434                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5435                         add_data_backref(extent_cache, key.objectid, 0,
5436                                         btrfs_extent_data_ref_root(eb, dref),
5437                                         btrfs_extent_data_ref_objectid(eb,
5438                                                                        dref),
5439                                         btrfs_extent_data_ref_offset(eb, dref),
5440                                         btrfs_extent_data_ref_count(eb, dref),
5441                                         0, num_bytes);
5442                         break;
5443                 case BTRFS_SHARED_DATA_REF_KEY:
5444                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5445                         add_data_backref(extent_cache, key.objectid, offset,
5446                                         0, 0, 0,
5447                                         btrfs_shared_data_ref_count(eb, sref),
5448                                         0, num_bytes);
5449                         break;
5450                 default:
5451                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5452                                 key.objectid, key.type, num_bytes);
5453                         goto out;
5454                 }
5455                 ptr += btrfs_extent_inline_ref_size(type);
5456         }
5457         WARN_ON(ptr > end);
5458 out:
5459         return 0;
5460 }
5461
5462 static int check_cache_range(struct btrfs_root *root,
5463                              struct btrfs_block_group_cache *cache,
5464                              u64 offset, u64 bytes)
5465 {
5466         struct btrfs_free_space *entry;
5467         u64 *logical;
5468         u64 bytenr;
5469         int stripe_len;
5470         int i, nr, ret;
5471
5472         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5473                 bytenr = btrfs_sb_offset(i);
5474                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5475                                        cache->key.objectid, bytenr, 0,
5476                                        &logical, &nr, &stripe_len);
5477                 if (ret)
5478                         return ret;
5479
5480                 while (nr--) {
5481                         if (logical[nr] + stripe_len <= offset)
5482                                 continue;
5483                         if (offset + bytes <= logical[nr])
5484                                 continue;
5485                         if (logical[nr] == offset) {
5486                                 if (stripe_len >= bytes) {
5487                                         kfree(logical);
5488                                         return 0;
5489                                 }
5490                                 bytes -= stripe_len;
5491                                 offset += stripe_len;
5492                         } else if (logical[nr] < offset) {
5493                                 if (logical[nr] + stripe_len >=
5494                                     offset + bytes) {
5495                                         kfree(logical);
5496                                         return 0;
5497                                 }
5498                                 bytes = (offset + bytes) -
5499                                         (logical[nr] + stripe_len);
5500                                 offset = logical[nr] + stripe_len;
5501                         } else {
5502                                 /*
5503                                  * Could be tricky, the super may land in the
5504                                  * middle of the area we're checking.  First
5505                                  * check the easiest case, it's at the end.
5506                                  */
5507                                 if (logical[nr] + stripe_len >=
5508                                     bytes + offset) {
5509                                         bytes = logical[nr] - offset;
5510                                         continue;
5511                                 }
5512
5513                                 /* Check the left side */
5514                                 ret = check_cache_range(root, cache,
5515                                                         offset,
5516                                                         logical[nr] - offset);
5517                                 if (ret) {
5518                                         kfree(logical);
5519                                         return ret;
5520                                 }
5521
5522                                 /* Now we continue with the right side */
5523                                 bytes = (offset + bytes) -
5524                                         (logical[nr] + stripe_len);
5525                                 offset = logical[nr] + stripe_len;
5526                         }
5527                 }
5528
5529                 kfree(logical);
5530         }
5531
5532         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5533         if (!entry) {
5534                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5535                         offset, offset+bytes);
5536                 return -EINVAL;
5537         }
5538
5539         if (entry->offset != offset) {
5540                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5541                         entry->offset);
5542                 return -EINVAL;
5543         }
5544
5545         if (entry->bytes != bytes) {
5546                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5547                         bytes, entry->bytes, offset);
5548                 return -EINVAL;
5549         }
5550
5551         unlink_free_space(cache->free_space_ctl, entry);
5552         free(entry);
5553         return 0;
5554 }
5555
5556 static int verify_space_cache(struct btrfs_root *root,
5557                               struct btrfs_block_group_cache *cache)
5558 {
5559         struct btrfs_path *path;
5560         struct extent_buffer *leaf;
5561         struct btrfs_key key;
5562         u64 last;
5563         int ret = 0;
5564
5565         path = btrfs_alloc_path();
5566         if (!path)
5567                 return -ENOMEM;
5568
5569         root = root->fs_info->extent_root;
5570
5571         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5572
5573         key.objectid = last;
5574         key.offset = 0;
5575         key.type = BTRFS_EXTENT_ITEM_KEY;
5576
5577         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5578         if (ret < 0)
5579                 goto out;
5580         ret = 0;
5581         while (1) {
5582                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5583                         ret = btrfs_next_leaf(root, path);
5584                         if (ret < 0)
5585                                 goto out;
5586                         if (ret > 0) {
5587                                 ret = 0;
5588                                 break;
5589                         }
5590                 }
5591                 leaf = path->nodes[0];
5592                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5593                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5594                         break;
5595                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5596                     key.type != BTRFS_METADATA_ITEM_KEY) {
5597                         path->slots[0]++;
5598                         continue;
5599                 }
5600
5601                 if (last == key.objectid) {
5602                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5603                                 last = key.objectid + key.offset;
5604                         else
5605                                 last = key.objectid + root->nodesize;
5606                         path->slots[0]++;
5607                         continue;
5608                 }
5609
5610                 ret = check_cache_range(root, cache, last,
5611                                         key.objectid - last);
5612                 if (ret)
5613                         break;
5614                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5615                         last = key.objectid + key.offset;
5616                 else
5617                         last = key.objectid + root->nodesize;
5618                 path->slots[0]++;
5619         }
5620
5621         if (last < cache->key.objectid + cache->key.offset)
5622                 ret = check_cache_range(root, cache, last,
5623                                         cache->key.objectid +
5624                                         cache->key.offset - last);
5625
5626 out:
5627         btrfs_free_path(path);
5628
5629         if (!ret &&
5630             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5631                 fprintf(stderr, "There are still entries left in the space "
5632                         "cache\n");
5633                 ret = -EINVAL;
5634         }
5635
5636         return ret;
5637 }
5638
5639 static int check_space_cache(struct btrfs_root *root)
5640 {
5641         struct btrfs_block_group_cache *cache;
5642         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5643         int ret;
5644         int error = 0;
5645
5646         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5647             btrfs_super_generation(root->fs_info->super_copy) !=
5648             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5649                 printf("cache and super generation don't match, space cache "
5650                        "will be invalidated\n");
5651                 return 0;
5652         }
5653
5654         if (ctx.progress_enabled) {
5655                 ctx.tp = TASK_FREE_SPACE;
5656                 task_start(ctx.info);
5657         }
5658
5659         while (1) {
5660                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5661                 if (!cache)
5662                         break;
5663
5664                 start = cache->key.objectid + cache->key.offset;
5665                 if (!cache->free_space_ctl) {
5666                         if (btrfs_init_free_space_ctl(cache,
5667                                                       root->sectorsize)) {
5668                                 ret = -ENOMEM;
5669                                 break;
5670                         }
5671                 } else {
5672                         btrfs_remove_free_space_cache(cache);
5673                 }
5674
5675                 if (btrfs_fs_compat_ro(root->fs_info,
5676                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5677                         ret = exclude_super_stripes(root, cache);
5678                         if (ret) {
5679                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5680                                         strerror(-ret));
5681                                 error++;
5682                                 continue;
5683                         }
5684                         ret = load_free_space_tree(root->fs_info, cache);
5685                         free_excluded_extents(root, cache);
5686                         if (ret < 0) {
5687                                 fprintf(stderr, "could not load free space tree: %s\n",
5688                                         strerror(-ret));
5689                                 error++;
5690                                 continue;
5691                         }
5692                         error += ret;
5693                 } else {
5694                         ret = load_free_space_cache(root->fs_info, cache);
5695                         if (!ret)
5696                                 continue;
5697                 }
5698
5699                 ret = verify_space_cache(root, cache);
5700                 if (ret) {
5701                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5702                                 cache->key.objectid);
5703                         error++;
5704                 }
5705         }
5706
5707         task_stop(ctx.info);
5708
5709         return error ? -EINVAL : 0;
5710 }
5711
5712 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5713                         u64 num_bytes, unsigned long leaf_offset,
5714                         struct extent_buffer *eb) {
5715
5716         u64 offset = 0;
5717         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5718         char *data;
5719         unsigned long csum_offset;
5720         u32 csum;
5721         u32 csum_expected;
5722         u64 read_len;
5723         u64 data_checked = 0;
5724         u64 tmp;
5725         int ret = 0;
5726         int mirror;
5727         int num_copies;
5728
5729         if (num_bytes % root->sectorsize)
5730                 return -EINVAL;
5731
5732         data = malloc(num_bytes);
5733         if (!data)
5734                 return -ENOMEM;
5735
5736         while (offset < num_bytes) {
5737                 mirror = 0;
5738 again:
5739                 read_len = num_bytes - offset;
5740                 /* read as much space once a time */
5741                 ret = read_extent_data(root, data + offset,
5742                                 bytenr + offset, &read_len, mirror);
5743                 if (ret)
5744                         goto out;
5745                 data_checked = 0;
5746                 /* verify every 4k data's checksum */
5747                 while (data_checked < read_len) {
5748                         csum = ~(u32)0;
5749                         tmp = offset + data_checked;
5750
5751                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5752                                                csum, root->sectorsize);
5753                         btrfs_csum_final(csum, (char *)&csum);
5754
5755                         csum_offset = leaf_offset +
5756                                  tmp / root->sectorsize * csum_size;
5757                         read_extent_buffer(eb, (char *)&csum_expected,
5758                                            csum_offset, csum_size);
5759                         /* try another mirror */
5760                         if (csum != csum_expected) {
5761                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5762                                                 mirror, bytenr + tmp,
5763                                                 csum, csum_expected);
5764                                 num_copies = btrfs_num_copies(
5765                                                 &root->fs_info->mapping_tree,
5766                                                 bytenr, num_bytes);
5767                                 if (mirror < num_copies - 1) {
5768                                         mirror += 1;
5769                                         goto again;
5770                                 }
5771                         }
5772                         data_checked += root->sectorsize;
5773                 }
5774                 offset += read_len;
5775         }
5776 out:
5777         free(data);
5778         return ret;
5779 }
5780
5781 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5782                                u64 num_bytes)
5783 {
5784         struct btrfs_path *path;
5785         struct extent_buffer *leaf;
5786         struct btrfs_key key;
5787         int ret;
5788
5789         path = btrfs_alloc_path();
5790         if (!path) {
5791                 fprintf(stderr, "Error allocating path\n");
5792                 return -ENOMEM;
5793         }
5794
5795         key.objectid = bytenr;
5796         key.type = BTRFS_EXTENT_ITEM_KEY;
5797         key.offset = (u64)-1;
5798
5799 again:
5800         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5801                                 0, 0);
5802         if (ret < 0) {
5803                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5804                 btrfs_free_path(path);
5805                 return ret;
5806         } else if (ret) {
5807                 if (path->slots[0] > 0) {
5808                         path->slots[0]--;
5809                 } else {
5810                         ret = btrfs_prev_leaf(root, path);
5811                         if (ret < 0) {
5812                                 goto out;
5813                         } else if (ret > 0) {
5814                                 ret = 0;
5815                                 goto out;
5816                         }
5817                 }
5818         }
5819
5820         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5821
5822         /*
5823          * Block group items come before extent items if they have the same
5824          * bytenr, so walk back one more just in case.  Dear future traveller,
5825          * first congrats on mastering time travel.  Now if it's not too much
5826          * trouble could you go back to 2006 and tell Chris to make the
5827          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5828          * EXTENT_ITEM_KEY please?
5829          */
5830         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5831                 if (path->slots[0] > 0) {
5832                         path->slots[0]--;
5833                 } else {
5834                         ret = btrfs_prev_leaf(root, path);
5835                         if (ret < 0) {
5836                                 goto out;
5837                         } else if (ret > 0) {
5838                                 ret = 0;
5839                                 goto out;
5840                         }
5841                 }
5842                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5843         }
5844
5845         while (num_bytes) {
5846                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5847                         ret = btrfs_next_leaf(root, path);
5848                         if (ret < 0) {
5849                                 fprintf(stderr, "Error going to next leaf "
5850                                         "%d\n", ret);
5851                                 btrfs_free_path(path);
5852                                 return ret;
5853                         } else if (ret) {
5854                                 break;
5855                         }
5856                 }
5857                 leaf = path->nodes[0];
5858                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5859                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5860                         path->slots[0]++;
5861                         continue;
5862                 }
5863                 if (key.objectid + key.offset < bytenr) {
5864                         path->slots[0]++;
5865                         continue;
5866                 }
5867                 if (key.objectid > bytenr + num_bytes)
5868                         break;
5869
5870                 if (key.objectid == bytenr) {
5871                         if (key.offset >= num_bytes) {
5872                                 num_bytes = 0;
5873                                 break;
5874                         }
5875                         num_bytes -= key.offset;
5876                         bytenr += key.offset;
5877                 } else if (key.objectid < bytenr) {
5878                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5879                                 num_bytes = 0;
5880                                 break;
5881                         }
5882                         num_bytes = (bytenr + num_bytes) -
5883                                 (key.objectid + key.offset);
5884                         bytenr = key.objectid + key.offset;
5885                 } else {
5886                         if (key.objectid + key.offset < bytenr + num_bytes) {
5887                                 u64 new_start = key.objectid + key.offset;
5888                                 u64 new_bytes = bytenr + num_bytes - new_start;
5889
5890                                 /*
5891                                  * Weird case, the extent is in the middle of
5892                                  * our range, we'll have to search one side
5893                                  * and then the other.  Not sure if this happens
5894                                  * in real life, but no harm in coding it up
5895                                  * anyway just in case.
5896                                  */
5897                                 btrfs_release_path(path);
5898                                 ret = check_extent_exists(root, new_start,
5899                                                           new_bytes);
5900                                 if (ret) {
5901                                         fprintf(stderr, "Right section didn't "
5902                                                 "have a record\n");
5903                                         break;
5904                                 }
5905                                 num_bytes = key.objectid - bytenr;
5906                                 goto again;
5907                         }
5908                         num_bytes = key.objectid - bytenr;
5909                 }
5910                 path->slots[0]++;
5911         }
5912         ret = 0;
5913
5914 out:
5915         if (num_bytes && !ret) {
5916                 fprintf(stderr, "There are no extents for csum range "
5917                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5918                 ret = 1;
5919         }
5920
5921         btrfs_free_path(path);
5922         return ret;
5923 }
5924
5925 static int check_csums(struct btrfs_root *root)
5926 {
5927         struct btrfs_path *path;
5928         struct extent_buffer *leaf;
5929         struct btrfs_key key;
5930         u64 offset = 0, num_bytes = 0;
5931         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5932         int errors = 0;
5933         int ret;
5934         u64 data_len;
5935         unsigned long leaf_offset;
5936
5937         root = root->fs_info->csum_root;
5938         if (!extent_buffer_uptodate(root->node)) {
5939                 fprintf(stderr, "No valid csum tree found\n");
5940                 return -ENOENT;
5941         }
5942
5943         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5944         key.type = BTRFS_EXTENT_CSUM_KEY;
5945         key.offset = 0;
5946
5947         path = btrfs_alloc_path();
5948         if (!path)
5949                 return -ENOMEM;
5950
5951         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5952         if (ret < 0) {
5953                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5954                 btrfs_free_path(path);
5955                 return ret;
5956         }
5957
5958         if (ret > 0 && path->slots[0])
5959                 path->slots[0]--;
5960         ret = 0;
5961
5962         while (1) {
5963                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5964                         ret = btrfs_next_leaf(root, path);
5965                         if (ret < 0) {
5966                                 fprintf(stderr, "Error going to next leaf "
5967                                         "%d\n", ret);
5968                                 break;
5969                         }
5970                         if (ret)
5971                                 break;
5972                 }
5973                 leaf = path->nodes[0];
5974
5975                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5976                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5977                         path->slots[0]++;
5978                         continue;
5979                 }
5980
5981                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5982                               csum_size) * root->sectorsize;
5983                 if (!check_data_csum)
5984                         goto skip_csum_check;
5985                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5986                 ret = check_extent_csums(root, key.offset, data_len,
5987                                          leaf_offset, leaf);
5988                 if (ret)
5989                         break;
5990 skip_csum_check:
5991                 if (!num_bytes) {
5992                         offset = key.offset;
5993                 } else if (key.offset != offset + num_bytes) {
5994                         ret = check_extent_exists(root, offset, num_bytes);
5995                         if (ret) {
5996                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5997                                         "there is no extent record\n",
5998                                         offset, offset+num_bytes);
5999                                 errors++;
6000                         }
6001                         offset = key.offset;
6002                         num_bytes = 0;
6003                 }
6004                 num_bytes += data_len;
6005                 path->slots[0]++;
6006         }
6007
6008         btrfs_free_path(path);
6009         return errors;
6010 }
6011
6012 static int is_dropped_key(struct btrfs_key *key,
6013                           struct btrfs_key *drop_key) {
6014         if (key->objectid < drop_key->objectid)
6015                 return 1;
6016         else if (key->objectid == drop_key->objectid) {
6017                 if (key->type < drop_key->type)
6018                         return 1;
6019                 else if (key->type == drop_key->type) {
6020                         if (key->offset < drop_key->offset)
6021                                 return 1;
6022                 }
6023         }
6024         return 0;
6025 }
6026
6027 /*
6028  * Here are the rules for FULL_BACKREF.
6029  *
6030  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6031  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6032  *      FULL_BACKREF set.
6033  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6034  *    if it happened after the relocation occurred since we'll have dropped the
6035  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6036  *    have no real way to know for sure.
6037  *
6038  * We process the blocks one root at a time, and we start from the lowest root
6039  * objectid and go to the highest.  So we can just lookup the owner backref for
6040  * the record and if we don't find it then we know it doesn't exist and we have
6041  * a FULL BACKREF.
6042  *
6043  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6044  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6045  * be set or not and then we can check later once we've gathered all the refs.
6046  */
6047 static int calc_extent_flag(struct btrfs_root *root,
6048                            struct cache_tree *extent_cache,
6049                            struct extent_buffer *buf,
6050                            struct root_item_record *ri,
6051                            u64 *flags)
6052 {
6053         struct extent_record *rec;
6054         struct cache_extent *cache;
6055         struct tree_backref *tback;
6056         u64 owner = 0;
6057
6058         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6059         /* we have added this extent before */
6060         BUG_ON(!cache);
6061         rec = container_of(cache, struct extent_record, cache);
6062
6063         /*
6064          * Except file/reloc tree, we can not have
6065          * FULL BACKREF MODE
6066          */
6067         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6068                 goto normal;
6069         /*
6070          * root node
6071          */
6072         if (buf->start == ri->bytenr)
6073                 goto normal;
6074
6075         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6076                 goto full_backref;
6077
6078         owner = btrfs_header_owner(buf);
6079         if (owner == ri->objectid)
6080                 goto normal;
6081
6082         tback = find_tree_backref(rec, 0, owner);
6083         if (!tback)
6084                 goto full_backref;
6085 normal:
6086         *flags = 0;
6087         if (rec->flag_block_full_backref != FLAG_UNSET &&
6088             rec->flag_block_full_backref != 0)
6089                 rec->bad_full_backref = 1;
6090         return 0;
6091 full_backref:
6092         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6093         if (rec->flag_block_full_backref != FLAG_UNSET &&
6094             rec->flag_block_full_backref != 1)
6095                 rec->bad_full_backref = 1;
6096         return 0;
6097 }
6098
6099 static int run_next_block(struct btrfs_root *root,
6100                           struct block_info *bits,
6101                           int bits_nr,
6102                           u64 *last,
6103                           struct cache_tree *pending,
6104                           struct cache_tree *seen,
6105                           struct cache_tree *reada,
6106                           struct cache_tree *nodes,
6107                           struct cache_tree *extent_cache,
6108                           struct cache_tree *chunk_cache,
6109                           struct rb_root *dev_cache,
6110                           struct block_group_tree *block_group_cache,
6111                           struct device_extent_tree *dev_extent_cache,
6112                           struct root_item_record *ri)
6113 {
6114         struct extent_buffer *buf;
6115         struct extent_record *rec = NULL;
6116         u64 bytenr;
6117         u32 size;
6118         u64 parent;
6119         u64 owner;
6120         u64 flags;
6121         u64 ptr;
6122         u64 gen = 0;
6123         int ret = 0;
6124         int i;
6125         int nritems;
6126         struct btrfs_key key;
6127         struct cache_extent *cache;
6128         int reada_bits;
6129
6130         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6131                                     bits_nr, &reada_bits);
6132         if (nritems == 0)
6133                 return 1;
6134
6135         if (!reada_bits) {
6136                 for(i = 0; i < nritems; i++) {
6137                         ret = add_cache_extent(reada, bits[i].start,
6138                                                bits[i].size);
6139                         if (ret == -EEXIST)
6140                                 continue;
6141
6142                         /* fixme, get the parent transid */
6143                         readahead_tree_block(root, bits[i].start,
6144                                              bits[i].size, 0);
6145                 }
6146         }
6147         *last = bits[0].start;
6148         bytenr = bits[0].start;
6149         size = bits[0].size;
6150
6151         cache = lookup_cache_extent(pending, bytenr, size);
6152         if (cache) {
6153                 remove_cache_extent(pending, cache);
6154                 free(cache);
6155         }
6156         cache = lookup_cache_extent(reada, bytenr, size);
6157         if (cache) {
6158                 remove_cache_extent(reada, cache);
6159                 free(cache);
6160         }
6161         cache = lookup_cache_extent(nodes, bytenr, size);
6162         if (cache) {
6163                 remove_cache_extent(nodes, cache);
6164                 free(cache);
6165         }
6166         cache = lookup_cache_extent(extent_cache, bytenr, size);
6167         if (cache) {
6168                 rec = container_of(cache, struct extent_record, cache);
6169                 gen = rec->parent_generation;
6170         }
6171
6172         /* fixme, get the real parent transid */
6173         buf = read_tree_block(root, bytenr, size, gen);
6174         if (!extent_buffer_uptodate(buf)) {
6175                 record_bad_block_io(root->fs_info,
6176                                     extent_cache, bytenr, size);
6177                 goto out;
6178         }
6179
6180         nritems = btrfs_header_nritems(buf);
6181
6182         flags = 0;
6183         if (!init_extent_tree) {
6184                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6185                                        btrfs_header_level(buf), 1, NULL,
6186                                        &flags);
6187                 if (ret < 0) {
6188                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6189                         if (ret < 0) {
6190                                 fprintf(stderr, "Couldn't calc extent flags\n");
6191                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6192                         }
6193                 }
6194         } else {
6195                 flags = 0;
6196                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6197                 if (ret < 0) {
6198                         fprintf(stderr, "Couldn't calc extent flags\n");
6199                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6200                 }
6201         }
6202
6203         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6204                 if (ri != NULL &&
6205                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6206                     ri->objectid == btrfs_header_owner(buf)) {
6207                         /*
6208                          * Ok we got to this block from it's original owner and
6209                          * we have FULL_BACKREF set.  Relocation can leave
6210                          * converted blocks over so this is altogether possible,
6211                          * however it's not possible if the generation > the
6212                          * last snapshot, so check for this case.
6213                          */
6214                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6215                             btrfs_header_generation(buf) > ri->last_snapshot) {
6216                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6217                                 rec->bad_full_backref = 1;
6218                         }
6219                 }
6220         } else {
6221                 if (ri != NULL &&
6222                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6223                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6224                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6225                         rec->bad_full_backref = 1;
6226                 }
6227         }
6228
6229         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6230                 rec->flag_block_full_backref = 1;
6231                 parent = bytenr;
6232                 owner = 0;
6233         } else {
6234                 rec->flag_block_full_backref = 0;
6235                 parent = 0;
6236                 owner = btrfs_header_owner(buf);
6237         }
6238
6239         ret = check_block(root, extent_cache, buf, flags);
6240         if (ret)
6241                 goto out;
6242
6243         if (btrfs_is_leaf(buf)) {
6244                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6245                 for (i = 0; i < nritems; i++) {
6246                         struct btrfs_file_extent_item *fi;
6247                         btrfs_item_key_to_cpu(buf, &key, i);
6248                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6249                                 process_extent_item(root, extent_cache, buf,
6250                                                     i);
6251                                 continue;
6252                         }
6253                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6254                                 process_extent_item(root, extent_cache, buf,
6255                                                     i);
6256                                 continue;
6257                         }
6258                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6259                                 total_csum_bytes +=
6260                                         btrfs_item_size_nr(buf, i);
6261                                 continue;
6262                         }
6263                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6264                                 process_chunk_item(chunk_cache, &key, buf, i);
6265                                 continue;
6266                         }
6267                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6268                                 process_device_item(dev_cache, &key, buf, i);
6269                                 continue;
6270                         }
6271                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6272                                 process_block_group_item(block_group_cache,
6273                                         &key, buf, i);
6274                                 continue;
6275                         }
6276                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6277                                 process_device_extent_item(dev_extent_cache,
6278                                         &key, buf, i);
6279                                 continue;
6280
6281                         }
6282                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6283 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6284                                 process_extent_ref_v0(extent_cache, buf, i);
6285 #else
6286                                 BUG();
6287 #endif
6288                                 continue;
6289                         }
6290
6291                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6292                                 add_tree_backref(extent_cache, key.objectid, 0,
6293                                                  key.offset, 0);
6294                                 continue;
6295                         }
6296                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6297                                 add_tree_backref(extent_cache, key.objectid,
6298                                                  key.offset, 0, 0);
6299                                 continue;
6300                         }
6301                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6302                                 struct btrfs_extent_data_ref *ref;
6303                                 ref = btrfs_item_ptr(buf, i,
6304                                                 struct btrfs_extent_data_ref);
6305                                 add_data_backref(extent_cache,
6306                                         key.objectid, 0,
6307                                         btrfs_extent_data_ref_root(buf, ref),
6308                                         btrfs_extent_data_ref_objectid(buf,
6309                                                                        ref),
6310                                         btrfs_extent_data_ref_offset(buf, ref),
6311                                         btrfs_extent_data_ref_count(buf, ref),
6312                                         0, root->sectorsize);
6313                                 continue;
6314                         }
6315                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6316                                 struct btrfs_shared_data_ref *ref;
6317                                 ref = btrfs_item_ptr(buf, i,
6318                                                 struct btrfs_shared_data_ref);
6319                                 add_data_backref(extent_cache,
6320                                         key.objectid, key.offset, 0, 0, 0,
6321                                         btrfs_shared_data_ref_count(buf, ref),
6322                                         0, root->sectorsize);
6323                                 continue;
6324                         }
6325                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6326                                 struct bad_item *bad;
6327
6328                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6329                                         continue;
6330                                 if (!owner)
6331                                         continue;
6332                                 bad = malloc(sizeof(struct bad_item));
6333                                 if (!bad)
6334                                         continue;
6335                                 INIT_LIST_HEAD(&bad->list);
6336                                 memcpy(&bad->key, &key,
6337                                        sizeof(struct btrfs_key));
6338                                 bad->root_id = owner;
6339                                 list_add_tail(&bad->list, &delete_items);
6340                                 continue;
6341                         }
6342                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6343                                 continue;
6344                         fi = btrfs_item_ptr(buf, i,
6345                                             struct btrfs_file_extent_item);
6346                         if (btrfs_file_extent_type(buf, fi) ==
6347                             BTRFS_FILE_EXTENT_INLINE)
6348                                 continue;
6349                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6350                                 continue;
6351
6352                         data_bytes_allocated +=
6353                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6354                         if (data_bytes_allocated < root->sectorsize) {
6355                                 abort();
6356                         }
6357                         data_bytes_referenced +=
6358                                 btrfs_file_extent_num_bytes(buf, fi);
6359                         add_data_backref(extent_cache,
6360                                 btrfs_file_extent_disk_bytenr(buf, fi),
6361                                 parent, owner, key.objectid, key.offset -
6362                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6363                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6364                 }
6365         } else {
6366                 int level;
6367                 struct btrfs_key first_key;
6368
6369                 first_key.objectid = 0;
6370
6371                 if (nritems > 0)
6372                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6373                 level = btrfs_header_level(buf);
6374                 for (i = 0; i < nritems; i++) {
6375                         struct extent_record tmpl;
6376
6377                         ptr = btrfs_node_blockptr(buf, i);
6378                         size = root->nodesize;
6379                         btrfs_node_key_to_cpu(buf, &key, i);
6380                         if (ri != NULL) {
6381                                 if ((level == ri->drop_level)
6382                                     && is_dropped_key(&key, &ri->drop_key)) {
6383                                         continue;
6384                                 }
6385                         }
6386
6387                         memset(&tmpl, 0, sizeof(tmpl));
6388                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6389                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6390                         tmpl.start = ptr;
6391                         tmpl.nr = size;
6392                         tmpl.refs = 1;
6393                         tmpl.metadata = 1;
6394                         tmpl.max_size = size;
6395                         ret = add_extent_rec(extent_cache, &tmpl);
6396                         BUG_ON(ret);
6397
6398                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6399
6400                         if (level > 1) {
6401                                 add_pending(nodes, seen, ptr, size);
6402                         } else {
6403                                 add_pending(pending, seen, ptr, size);
6404                         }
6405                 }
6406                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6407                                       nritems) * sizeof(struct btrfs_key_ptr);
6408         }
6409         total_btree_bytes += buf->len;
6410         if (fs_root_objectid(btrfs_header_owner(buf)))
6411                 total_fs_tree_bytes += buf->len;
6412         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6413                 total_extent_tree_bytes += buf->len;
6414         if (!found_old_backref &&
6415             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6416             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6417             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6418                 found_old_backref = 1;
6419 out:
6420         free_extent_buffer(buf);
6421         return ret;
6422 }
6423
6424 static int add_root_to_pending(struct extent_buffer *buf,
6425                                struct cache_tree *extent_cache,
6426                                struct cache_tree *pending,
6427                                struct cache_tree *seen,
6428                                struct cache_tree *nodes,
6429                                u64 objectid)
6430 {
6431         struct extent_record tmpl;
6432
6433         if (btrfs_header_level(buf) > 0)
6434                 add_pending(nodes, seen, buf->start, buf->len);
6435         else
6436                 add_pending(pending, seen, buf->start, buf->len);
6437
6438         memset(&tmpl, 0, sizeof(tmpl));
6439         tmpl.start = buf->start;
6440         tmpl.nr = buf->len;
6441         tmpl.is_root = 1;
6442         tmpl.refs = 1;
6443         tmpl.metadata = 1;
6444         tmpl.max_size = buf->len;
6445         add_extent_rec(extent_cache, &tmpl);
6446
6447         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6448             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6449                 add_tree_backref(extent_cache, buf->start, buf->start,
6450                                  0, 1);
6451         else
6452                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6453         return 0;
6454 }
6455
6456 /* as we fix the tree, we might be deleting blocks that
6457  * we're tracking for repair.  This hook makes sure we
6458  * remove any backrefs for blocks as we are fixing them.
6459  */
6460 static int free_extent_hook(struct btrfs_trans_handle *trans,
6461                             struct btrfs_root *root,
6462                             u64 bytenr, u64 num_bytes, u64 parent,
6463                             u64 root_objectid, u64 owner, u64 offset,
6464                             int refs_to_drop)
6465 {
6466         struct extent_record *rec;
6467         struct cache_extent *cache;
6468         int is_data;
6469         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6470
6471         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6472         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6473         if (!cache)
6474                 return 0;
6475
6476         rec = container_of(cache, struct extent_record, cache);
6477         if (is_data) {
6478                 struct data_backref *back;
6479                 back = find_data_backref(rec, parent, root_objectid, owner,
6480                                          offset, 1, bytenr, num_bytes);
6481                 if (!back)
6482                         goto out;
6483                 if (back->node.found_ref) {
6484                         back->found_ref -= refs_to_drop;
6485                         if (rec->refs)
6486                                 rec->refs -= refs_to_drop;
6487                 }
6488                 if (back->node.found_extent_tree) {
6489                         back->num_refs -= refs_to_drop;
6490                         if (rec->extent_item_refs)
6491                                 rec->extent_item_refs -= refs_to_drop;
6492                 }
6493                 if (back->found_ref == 0)
6494                         back->node.found_ref = 0;
6495                 if (back->num_refs == 0)
6496                         back->node.found_extent_tree = 0;
6497
6498                 if (!back->node.found_extent_tree && back->node.found_ref) {
6499                         rb_erase(&back->node.node, &rec->backref_tree);
6500                         free(back);
6501                 }
6502         } else {
6503                 struct tree_backref *back;
6504                 back = find_tree_backref(rec, parent, root_objectid);
6505                 if (!back)
6506                         goto out;
6507                 if (back->node.found_ref) {
6508                         if (rec->refs)
6509                                 rec->refs--;
6510                         back->node.found_ref = 0;
6511                 }
6512                 if (back->node.found_extent_tree) {
6513                         if (rec->extent_item_refs)
6514                                 rec->extent_item_refs--;
6515                         back->node.found_extent_tree = 0;
6516                 }
6517                 if (!back->node.found_extent_tree && back->node.found_ref) {
6518                         rb_erase(&back->node.node, &rec->backref_tree);
6519                         free(back);
6520                 }
6521         }
6522         maybe_free_extent_rec(extent_cache, rec);
6523 out:
6524         return 0;
6525 }
6526
6527 static int delete_extent_records(struct btrfs_trans_handle *trans,
6528                                  struct btrfs_root *root,
6529                                  struct btrfs_path *path,
6530                                  u64 bytenr, u64 new_len)
6531 {
6532         struct btrfs_key key;
6533         struct btrfs_key found_key;
6534         struct extent_buffer *leaf;
6535         int ret;
6536         int slot;
6537
6538
6539         key.objectid = bytenr;
6540         key.type = (u8)-1;
6541         key.offset = (u64)-1;
6542
6543         while(1) {
6544                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6545                                         &key, path, 0, 1);
6546                 if (ret < 0)
6547                         break;
6548
6549                 if (ret > 0) {
6550                         ret = 0;
6551                         if (path->slots[0] == 0)
6552                                 break;
6553                         path->slots[0]--;
6554                 }
6555                 ret = 0;
6556
6557                 leaf = path->nodes[0];
6558                 slot = path->slots[0];
6559
6560                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6561                 if (found_key.objectid != bytenr)
6562                         break;
6563
6564                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6565                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6566                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6567                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6568                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6569                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6570                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6571                         btrfs_release_path(path);
6572                         if (found_key.type == 0) {
6573                                 if (found_key.offset == 0)
6574                                         break;
6575                                 key.offset = found_key.offset - 1;
6576                                 key.type = found_key.type;
6577                         }
6578                         key.type = found_key.type - 1;
6579                         key.offset = (u64)-1;
6580                         continue;
6581                 }
6582
6583                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6584                         found_key.objectid, found_key.type, found_key.offset);
6585
6586                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6587                 if (ret)
6588                         break;
6589                 btrfs_release_path(path);
6590
6591                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6592                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6593                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6594                                 found_key.offset : root->nodesize;
6595
6596                         ret = btrfs_update_block_group(trans, root, bytenr,
6597                                                        bytes, 0, 0);
6598                         if (ret)
6599                                 break;
6600                 }
6601         }
6602
6603         btrfs_release_path(path);
6604         return ret;
6605 }
6606
6607 /*
6608  * for a single backref, this will allocate a new extent
6609  * and add the backref to it.
6610  */
6611 static int record_extent(struct btrfs_trans_handle *trans,
6612                          struct btrfs_fs_info *info,
6613                          struct btrfs_path *path,
6614                          struct extent_record *rec,
6615                          struct extent_backref *back,
6616                          int allocated, u64 flags)
6617 {
6618         int ret;
6619         struct btrfs_root *extent_root = info->extent_root;
6620         struct extent_buffer *leaf;
6621         struct btrfs_key ins_key;
6622         struct btrfs_extent_item *ei;
6623         struct tree_backref *tback;
6624         struct data_backref *dback;
6625         struct btrfs_tree_block_info *bi;
6626
6627         if (!back->is_data)
6628                 rec->max_size = max_t(u64, rec->max_size,
6629                                     info->extent_root->nodesize);
6630
6631         if (!allocated) {
6632                 u32 item_size = sizeof(*ei);
6633
6634                 if (!back->is_data)
6635                         item_size += sizeof(*bi);
6636
6637                 ins_key.objectid = rec->start;
6638                 ins_key.offset = rec->max_size;
6639                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6640
6641                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6642                                         &ins_key, item_size);
6643                 if (ret)
6644                         goto fail;
6645
6646                 leaf = path->nodes[0];
6647                 ei = btrfs_item_ptr(leaf, path->slots[0],
6648                                     struct btrfs_extent_item);
6649
6650                 btrfs_set_extent_refs(leaf, ei, 0);
6651                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6652
6653                 if (back->is_data) {
6654                         btrfs_set_extent_flags(leaf, ei,
6655                                                BTRFS_EXTENT_FLAG_DATA);
6656                 } else {
6657                         struct btrfs_disk_key copy_key;;
6658
6659                         tback = to_tree_backref(back);
6660                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6661                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6662                                              sizeof(*bi));
6663
6664                         btrfs_set_disk_key_objectid(&copy_key,
6665                                                     rec->info_objectid);
6666                         btrfs_set_disk_key_type(&copy_key, 0);
6667                         btrfs_set_disk_key_offset(&copy_key, 0);
6668
6669                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6670                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6671
6672                         btrfs_set_extent_flags(leaf, ei,
6673                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6674                 }
6675
6676                 btrfs_mark_buffer_dirty(leaf);
6677                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6678                                                rec->max_size, 1, 0);
6679                 if (ret)
6680                         goto fail;
6681                 btrfs_release_path(path);
6682         }
6683
6684         if (back->is_data) {
6685                 u64 parent;
6686                 int i;
6687
6688                 dback = to_data_backref(back);
6689                 if (back->full_backref)
6690                         parent = dback->parent;
6691                 else
6692                         parent = 0;
6693
6694                 for (i = 0; i < dback->found_ref; i++) {
6695                         /* if parent != 0, we're doing a full backref
6696                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6697                          * just makes the backref allocator create a data
6698                          * backref
6699                          */
6700                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6701                                                    rec->start, rec->max_size,
6702                                                    parent,
6703                                                    dback->root,
6704                                                    parent ?
6705                                                    BTRFS_FIRST_FREE_OBJECTID :
6706                                                    dback->owner,
6707                                                    dback->offset);
6708                         if (ret)
6709                                 break;
6710                 }
6711                 fprintf(stderr, "adding new data backref"
6712                                 " on %llu %s %llu owner %llu"
6713                                 " offset %llu found %d\n",
6714                                 (unsigned long long)rec->start,
6715                                 back->full_backref ?
6716                                 "parent" : "root",
6717                                 back->full_backref ?
6718                                 (unsigned long long)parent :
6719                                 (unsigned long long)dback->root,
6720                                 (unsigned long long)dback->owner,
6721                                 (unsigned long long)dback->offset,
6722                                 dback->found_ref);
6723         } else {
6724                 u64 parent;
6725
6726                 tback = to_tree_backref(back);
6727                 if (back->full_backref)
6728                         parent = tback->parent;
6729                 else
6730                         parent = 0;
6731
6732                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6733                                            rec->start, rec->max_size,
6734                                            parent, tback->root, 0, 0);
6735                 fprintf(stderr, "adding new tree backref on "
6736                         "start %llu len %llu parent %llu root %llu\n",
6737                         rec->start, rec->max_size, parent, tback->root);
6738         }
6739 fail:
6740         btrfs_release_path(path);
6741         return ret;
6742 }
6743
6744 static struct extent_entry *find_entry(struct list_head *entries,
6745                                        u64 bytenr, u64 bytes)
6746 {
6747         struct extent_entry *entry = NULL;
6748
6749         list_for_each_entry(entry, entries, list) {
6750                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6751                         return entry;
6752         }
6753
6754         return NULL;
6755 }
6756
6757 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6758 {
6759         struct extent_entry *entry, *best = NULL, *prev = NULL;
6760
6761         list_for_each_entry(entry, entries, list) {
6762                 if (!prev) {
6763                         prev = entry;
6764                         continue;
6765                 }
6766
6767                 /*
6768                  * If there are as many broken entries as entries then we know
6769                  * not to trust this particular entry.
6770                  */
6771                 if (entry->broken == entry->count)
6772                         continue;
6773
6774                 /*
6775                  * If our current entry == best then we can't be sure our best
6776                  * is really the best, so we need to keep searching.
6777                  */
6778                 if (best && best->count == entry->count) {
6779                         prev = entry;
6780                         best = NULL;
6781                         continue;
6782                 }
6783
6784                 /* Prev == entry, not good enough, have to keep searching */
6785                 if (!prev->broken && prev->count == entry->count)
6786                         continue;
6787
6788                 if (!best)
6789                         best = (prev->count > entry->count) ? prev : entry;
6790                 else if (best->count < entry->count)
6791                         best = entry;
6792                 prev = entry;
6793         }
6794
6795         return best;
6796 }
6797
6798 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6799                       struct data_backref *dback, struct extent_entry *entry)
6800 {
6801         struct btrfs_trans_handle *trans;
6802         struct btrfs_root *root;
6803         struct btrfs_file_extent_item *fi;
6804         struct extent_buffer *leaf;
6805         struct btrfs_key key;
6806         u64 bytenr, bytes;
6807         int ret, err;
6808
6809         key.objectid = dback->root;
6810         key.type = BTRFS_ROOT_ITEM_KEY;
6811         key.offset = (u64)-1;
6812         root = btrfs_read_fs_root(info, &key);
6813         if (IS_ERR(root)) {
6814                 fprintf(stderr, "Couldn't find root for our ref\n");
6815                 return -EINVAL;
6816         }
6817
6818         /*
6819          * The backref points to the original offset of the extent if it was
6820          * split, so we need to search down to the offset we have and then walk
6821          * forward until we find the backref we're looking for.
6822          */
6823         key.objectid = dback->owner;
6824         key.type = BTRFS_EXTENT_DATA_KEY;
6825         key.offset = dback->offset;
6826         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6827         if (ret < 0) {
6828                 fprintf(stderr, "Error looking up ref %d\n", ret);
6829                 return ret;
6830         }
6831
6832         while (1) {
6833                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6834                         ret = btrfs_next_leaf(root, path);
6835                         if (ret) {
6836                                 fprintf(stderr, "Couldn't find our ref, next\n");
6837                                 return -EINVAL;
6838                         }
6839                 }
6840                 leaf = path->nodes[0];
6841                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6842                 if (key.objectid != dback->owner ||
6843                     key.type != BTRFS_EXTENT_DATA_KEY) {
6844                         fprintf(stderr, "Couldn't find our ref, search\n");
6845                         return -EINVAL;
6846                 }
6847                 fi = btrfs_item_ptr(leaf, path->slots[0],
6848                                     struct btrfs_file_extent_item);
6849                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6850                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6851
6852                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6853                         break;
6854                 path->slots[0]++;
6855         }
6856
6857         btrfs_release_path(path);
6858
6859         trans = btrfs_start_transaction(root, 1);
6860         if (IS_ERR(trans))
6861                 return PTR_ERR(trans);
6862
6863         /*
6864          * Ok we have the key of the file extent we want to fix, now we can cow
6865          * down to the thing and fix it.
6866          */
6867         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6868         if (ret < 0) {
6869                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6870                         key.objectid, key.type, key.offset, ret);
6871                 goto out;
6872         }
6873         if (ret > 0) {
6874                 fprintf(stderr, "Well that's odd, we just found this key "
6875                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6876                         key.offset);
6877                 ret = -EINVAL;
6878                 goto out;
6879         }
6880         leaf = path->nodes[0];
6881         fi = btrfs_item_ptr(leaf, path->slots[0],
6882                             struct btrfs_file_extent_item);
6883
6884         if (btrfs_file_extent_compression(leaf, fi) &&
6885             dback->disk_bytenr != entry->bytenr) {
6886                 fprintf(stderr, "Ref doesn't match the record start and is "
6887                         "compressed, please take a btrfs-image of this file "
6888                         "system and send it to a btrfs developer so they can "
6889                         "complete this functionality for bytenr %Lu\n",
6890                         dback->disk_bytenr);
6891                 ret = -EINVAL;
6892                 goto out;
6893         }
6894
6895         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6896                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6897         } else if (dback->disk_bytenr > entry->bytenr) {
6898                 u64 off_diff, offset;
6899
6900                 off_diff = dback->disk_bytenr - entry->bytenr;
6901                 offset = btrfs_file_extent_offset(leaf, fi);
6902                 if (dback->disk_bytenr + offset +
6903                     btrfs_file_extent_num_bytes(leaf, fi) >
6904                     entry->bytenr + entry->bytes) {
6905                         fprintf(stderr, "Ref is past the entry end, please "
6906                                 "take a btrfs-image of this file system and "
6907                                 "send it to a btrfs developer, ref %Lu\n",
6908                                 dback->disk_bytenr);
6909                         ret = -EINVAL;
6910                         goto out;
6911                 }
6912                 offset += off_diff;
6913                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6914                 btrfs_set_file_extent_offset(leaf, fi, offset);
6915         } else if (dback->disk_bytenr < entry->bytenr) {
6916                 u64 offset;
6917
6918                 offset = btrfs_file_extent_offset(leaf, fi);
6919                 if (dback->disk_bytenr + offset < entry->bytenr) {
6920                         fprintf(stderr, "Ref is before the entry start, please"
6921                                 " take a btrfs-image of this file system and "
6922                                 "send it to a btrfs developer, ref %Lu\n",
6923                                 dback->disk_bytenr);
6924                         ret = -EINVAL;
6925                         goto out;
6926                 }
6927
6928                 offset += dback->disk_bytenr;
6929                 offset -= entry->bytenr;
6930                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6931                 btrfs_set_file_extent_offset(leaf, fi, offset);
6932         }
6933
6934         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6935
6936         /*
6937          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6938          * only do this if we aren't using compression, otherwise it's a
6939          * trickier case.
6940          */
6941         if (!btrfs_file_extent_compression(leaf, fi))
6942                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6943         else
6944                 printf("ram bytes may be wrong?\n");
6945         btrfs_mark_buffer_dirty(leaf);
6946 out:
6947         err = btrfs_commit_transaction(trans, root);
6948         btrfs_release_path(path);
6949         return ret ? ret : err;
6950 }
6951
6952 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6953                            struct extent_record *rec)
6954 {
6955         struct extent_backref *back, *tmp;
6956         struct data_backref *dback;
6957         struct extent_entry *entry, *best = NULL;
6958         LIST_HEAD(entries);
6959         int nr_entries = 0;
6960         int broken_entries = 0;
6961         int ret = 0;
6962         short mismatch = 0;
6963
6964         /*
6965          * Metadata is easy and the backrefs should always agree on bytenr and
6966          * size, if not we've got bigger issues.
6967          */
6968         if (rec->metadata)
6969                 return 0;
6970
6971         rbtree_postorder_for_each_entry_safe(back, tmp,
6972                                              &rec->backref_tree, node) {
6973                 if (back->full_backref || !back->is_data)
6974                         continue;
6975
6976                 dback = to_data_backref(back);
6977
6978                 /*
6979                  * We only pay attention to backrefs that we found a real
6980                  * backref for.
6981                  */
6982                 if (dback->found_ref == 0)
6983                         continue;
6984
6985                 /*
6986                  * For now we only catch when the bytes don't match, not the
6987                  * bytenr.  We can easily do this at the same time, but I want
6988                  * to have a fs image to test on before we just add repair
6989                  * functionality willy-nilly so we know we won't screw up the
6990                  * repair.
6991                  */
6992
6993                 entry = find_entry(&entries, dback->disk_bytenr,
6994                                    dback->bytes);
6995                 if (!entry) {
6996                         entry = malloc(sizeof(struct extent_entry));
6997                         if (!entry) {
6998                                 ret = -ENOMEM;
6999                                 goto out;
7000                         }
7001                         memset(entry, 0, sizeof(*entry));
7002                         entry->bytenr = dback->disk_bytenr;
7003                         entry->bytes = dback->bytes;
7004                         list_add_tail(&entry->list, &entries);
7005                         nr_entries++;
7006                 }
7007
7008                 /*
7009                  * If we only have on entry we may think the entries agree when
7010                  * in reality they don't so we have to do some extra checking.
7011                  */
7012                 if (dback->disk_bytenr != rec->start ||
7013                     dback->bytes != rec->nr || back->broken)
7014                         mismatch = 1;
7015
7016                 if (back->broken) {
7017                         entry->broken++;
7018                         broken_entries++;
7019                 }
7020
7021                 entry->count++;
7022         }
7023
7024         /* Yay all the backrefs agree, carry on good sir */
7025         if (nr_entries <= 1 && !mismatch)
7026                 goto out;
7027
7028         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7029                 "%Lu\n", rec->start);
7030
7031         /*
7032          * First we want to see if the backrefs can agree amongst themselves who
7033          * is right, so figure out which one of the entries has the highest
7034          * count.
7035          */
7036         best = find_most_right_entry(&entries);
7037
7038         /*
7039          * Ok so we may have an even split between what the backrefs think, so
7040          * this is where we use the extent ref to see what it thinks.
7041          */
7042         if (!best) {
7043                 entry = find_entry(&entries, rec->start, rec->nr);
7044                 if (!entry && (!broken_entries || !rec->found_rec)) {
7045                         fprintf(stderr, "Backrefs don't agree with each other "
7046                                 "and extent record doesn't agree with anybody,"
7047                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7048                                 rec->start, rec->nr);
7049                         ret = -EINVAL;
7050                         goto out;
7051                 } else if (!entry) {
7052                         /*
7053                          * Ok our backrefs were broken, we'll assume this is the
7054                          * correct value and add an entry for this range.
7055                          */
7056                         entry = malloc(sizeof(struct extent_entry));
7057                         if (!entry) {
7058                                 ret = -ENOMEM;
7059                                 goto out;
7060                         }
7061                         memset(entry, 0, sizeof(*entry));
7062                         entry->bytenr = rec->start;
7063                         entry->bytes = rec->nr;
7064                         list_add_tail(&entry->list, &entries);
7065                         nr_entries++;
7066                 }
7067                 entry->count++;
7068                 best = find_most_right_entry(&entries);
7069                 if (!best) {
7070                         fprintf(stderr, "Backrefs and extent record evenly "
7071                                 "split on who is right, this is going to "
7072                                 "require user input to fix bytenr %Lu bytes "
7073                                 "%Lu\n", rec->start, rec->nr);
7074                         ret = -EINVAL;
7075                         goto out;
7076                 }
7077         }
7078
7079         /*
7080          * I don't think this can happen currently as we'll abort() if we catch
7081          * this case higher up, but in case somebody removes that we still can't
7082          * deal with it properly here yet, so just bail out of that's the case.
7083          */
7084         if (best->bytenr != rec->start) {
7085                 fprintf(stderr, "Extent start and backref starts don't match, "
7086                         "please use btrfs-image on this file system and send "
7087                         "it to a btrfs developer so they can make fsck fix "
7088                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7089                         rec->start, rec->nr);
7090                 ret = -EINVAL;
7091                 goto out;
7092         }
7093
7094         /*
7095          * Ok great we all agreed on an extent record, let's go find the real
7096          * references and fix up the ones that don't match.
7097          */
7098         rbtree_postorder_for_each_entry_safe(back, tmp,
7099                                              &rec->backref_tree, node) {
7100                 if (back->full_backref || !back->is_data)
7101                         continue;
7102
7103                 dback = to_data_backref(back);
7104
7105                 /*
7106                  * Still ignoring backrefs that don't have a real ref attached
7107                  * to them.
7108                  */
7109                 if (dback->found_ref == 0)
7110                         continue;
7111
7112                 if (dback->bytes == best->bytes &&
7113                     dback->disk_bytenr == best->bytenr)
7114                         continue;
7115
7116                 ret = repair_ref(info, path, dback, best);
7117                 if (ret)
7118                         goto out;
7119         }
7120
7121         /*
7122          * Ok we messed with the actual refs, which means we need to drop our
7123          * entire cache and go back and rescan.  I know this is a huge pain and
7124          * adds a lot of extra work, but it's the only way to be safe.  Once all
7125          * the backrefs agree we may not need to do anything to the extent
7126          * record itself.
7127          */
7128         ret = -EAGAIN;
7129 out:
7130         while (!list_empty(&entries)) {
7131                 entry = list_entry(entries.next, struct extent_entry, list);
7132                 list_del_init(&entry->list);
7133                 free(entry);
7134         }
7135         return ret;
7136 }
7137
7138 static int process_duplicates(struct btrfs_root *root,
7139                               struct cache_tree *extent_cache,
7140                               struct extent_record *rec)
7141 {
7142         struct extent_record *good, *tmp;
7143         struct cache_extent *cache;
7144         int ret;
7145
7146         /*
7147          * If we found a extent record for this extent then return, or if we
7148          * have more than one duplicate we are likely going to need to delete
7149          * something.
7150          */
7151         if (rec->found_rec || rec->num_duplicates > 1)
7152                 return 0;
7153
7154         /* Shouldn't happen but just in case */
7155         BUG_ON(!rec->num_duplicates);
7156
7157         /*
7158          * So this happens if we end up with a backref that doesn't match the
7159          * actual extent entry.  So either the backref is bad or the extent
7160          * entry is bad.  Either way we want to have the extent_record actually
7161          * reflect what we found in the extent_tree, so we need to take the
7162          * duplicate out and use that as the extent_record since the only way we
7163          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7164          */
7165         remove_cache_extent(extent_cache, &rec->cache);
7166
7167         good = to_extent_record(rec->dups.next);
7168         list_del_init(&good->list);
7169         INIT_LIST_HEAD(&good->backrefs);
7170         INIT_LIST_HEAD(&good->dups);
7171         good->cache.start = good->start;
7172         good->cache.size = good->nr;
7173         good->content_checked = 0;
7174         good->owner_ref_checked = 0;
7175         good->num_duplicates = 0;
7176         good->refs = rec->refs;
7177         list_splice_init(&rec->backrefs, &good->backrefs);
7178         while (1) {
7179                 cache = lookup_cache_extent(extent_cache, good->start,
7180                                             good->nr);
7181                 if (!cache)
7182                         break;
7183                 tmp = container_of(cache, struct extent_record, cache);
7184
7185                 /*
7186                  * If we find another overlapping extent and it's found_rec is
7187                  * set then it's a duplicate and we need to try and delete
7188                  * something.
7189                  */
7190                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7191                         if (list_empty(&good->list))
7192                                 list_add_tail(&good->list,
7193                                               &duplicate_extents);
7194                         good->num_duplicates += tmp->num_duplicates + 1;
7195                         list_splice_init(&tmp->dups, &good->dups);
7196                         list_del_init(&tmp->list);
7197                         list_add_tail(&tmp->list, &good->dups);
7198                         remove_cache_extent(extent_cache, &tmp->cache);
7199                         continue;
7200                 }
7201
7202                 /*
7203                  * Ok we have another non extent item backed extent rec, so lets
7204                  * just add it to this extent and carry on like we did above.
7205                  */
7206                 good->refs += tmp->refs;
7207                 list_splice_init(&tmp->backrefs, &good->backrefs);
7208                 remove_cache_extent(extent_cache, &tmp->cache);
7209                 free(tmp);
7210         }
7211         ret = insert_cache_extent(extent_cache, &good->cache);
7212         BUG_ON(ret);
7213         free(rec);
7214         return good->num_duplicates ? 0 : 1;
7215 }
7216
7217 static int delete_duplicate_records(struct btrfs_root *root,
7218                                     struct extent_record *rec)
7219 {
7220         struct btrfs_trans_handle *trans;
7221         LIST_HEAD(delete_list);
7222         struct btrfs_path *path;
7223         struct extent_record *tmp, *good, *n;
7224         int nr_del = 0;
7225         int ret = 0, err;
7226         struct btrfs_key key;
7227
7228         path = btrfs_alloc_path();
7229         if (!path) {
7230                 ret = -ENOMEM;
7231                 goto out;
7232         }
7233
7234         good = rec;
7235         /* Find the record that covers all of the duplicates. */
7236         list_for_each_entry(tmp, &rec->dups, list) {
7237                 if (good->start < tmp->start)
7238                         continue;
7239                 if (good->nr > tmp->nr)
7240                         continue;
7241
7242                 if (tmp->start + tmp->nr < good->start + good->nr) {
7243                         fprintf(stderr, "Ok we have overlapping extents that "
7244                                 "aren't completely covered by each other, this "
7245                                 "is going to require more careful thought.  "
7246                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7247                                 tmp->start, tmp->nr, good->start, good->nr);
7248                         abort();
7249                 }
7250                 good = tmp;
7251         }
7252
7253         if (good != rec)
7254                 list_add_tail(&rec->list, &delete_list);
7255
7256         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7257                 if (tmp == good)
7258                         continue;
7259                 list_move_tail(&tmp->list, &delete_list);
7260         }
7261
7262         root = root->fs_info->extent_root;
7263         trans = btrfs_start_transaction(root, 1);
7264         if (IS_ERR(trans)) {
7265                 ret = PTR_ERR(trans);
7266                 goto out;
7267         }
7268
7269         list_for_each_entry(tmp, &delete_list, list) {
7270                 if (tmp->found_rec == 0)
7271                         continue;
7272                 key.objectid = tmp->start;
7273                 key.type = BTRFS_EXTENT_ITEM_KEY;
7274                 key.offset = tmp->nr;
7275
7276                 /* Shouldn't happen but just in case */
7277                 if (tmp->metadata) {
7278                         fprintf(stderr, "Well this shouldn't happen, extent "
7279                                 "record overlaps but is metadata? "
7280                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7281                         abort();
7282                 }
7283
7284                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7285                 if (ret) {
7286                         if (ret > 0)
7287                                 ret = -EINVAL;
7288                         break;
7289                 }
7290                 ret = btrfs_del_item(trans, root, path);
7291                 if (ret)
7292                         break;
7293                 btrfs_release_path(path);
7294                 nr_del++;
7295         }
7296         err = btrfs_commit_transaction(trans, root);
7297         if (err && !ret)
7298                 ret = err;
7299 out:
7300         while (!list_empty(&delete_list)) {
7301                 tmp = to_extent_record(delete_list.next);
7302                 list_del_init(&tmp->list);
7303                 if (tmp == rec)
7304                         continue;
7305                 free(tmp);
7306         }
7307
7308         while (!list_empty(&rec->dups)) {
7309                 tmp = to_extent_record(rec->dups.next);
7310                 list_del_init(&tmp->list);
7311                 free(tmp);
7312         }
7313
7314         btrfs_free_path(path);
7315
7316         if (!ret && !nr_del)
7317                 rec->num_duplicates = 0;
7318
7319         return ret ? ret : nr_del;
7320 }
7321
7322 static int find_possible_backrefs(struct btrfs_fs_info *info,
7323                                   struct btrfs_path *path,
7324                                   struct cache_tree *extent_cache,
7325                                   struct extent_record *rec)
7326 {
7327         struct btrfs_root *root;
7328         struct extent_backref *back, *tmp;
7329         struct data_backref *dback;
7330         struct cache_extent *cache;
7331         struct btrfs_file_extent_item *fi;
7332         struct btrfs_key key;
7333         u64 bytenr, bytes;
7334         int ret;
7335
7336         rbtree_postorder_for_each_entry_safe(back, tmp,
7337                                              &rec->backref_tree, node) {
7338                 /* Don't care about full backrefs (poor unloved backrefs) */
7339                 if (back->full_backref || !back->is_data)
7340                         continue;
7341
7342                 dback = to_data_backref(back);
7343
7344                 /* We found this one, we don't need to do a lookup */
7345                 if (dback->found_ref)
7346                         continue;
7347
7348                 key.objectid = dback->root;
7349                 key.type = BTRFS_ROOT_ITEM_KEY;
7350                 key.offset = (u64)-1;
7351
7352                 root = btrfs_read_fs_root(info, &key);
7353
7354                 /* No root, definitely a bad ref, skip */
7355                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7356                         continue;
7357                 /* Other err, exit */
7358                 if (IS_ERR(root))
7359                         return PTR_ERR(root);
7360
7361                 key.objectid = dback->owner;
7362                 key.type = BTRFS_EXTENT_DATA_KEY;
7363                 key.offset = dback->offset;
7364                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7365                 if (ret) {
7366                         btrfs_release_path(path);
7367                         if (ret < 0)
7368                                 return ret;
7369                         /* Didn't find it, we can carry on */
7370                         ret = 0;
7371                         continue;
7372                 }
7373
7374                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7375                                     struct btrfs_file_extent_item);
7376                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7377                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7378                 btrfs_release_path(path);
7379                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7380                 if (cache) {
7381                         struct extent_record *tmp;
7382                         tmp = container_of(cache, struct extent_record, cache);
7383
7384                         /*
7385                          * If we found an extent record for the bytenr for this
7386                          * particular backref then we can't add it to our
7387                          * current extent record.  We only want to add backrefs
7388                          * that don't have a corresponding extent item in the
7389                          * extent tree since they likely belong to this record
7390                          * and we need to fix it if it doesn't match bytenrs.
7391                          */
7392                         if  (tmp->found_rec)
7393                                 continue;
7394                 }
7395
7396                 dback->found_ref += 1;
7397                 dback->disk_bytenr = bytenr;
7398                 dback->bytes = bytes;
7399
7400                 /*
7401                  * Set this so the verify backref code knows not to trust the
7402                  * values in this backref.
7403                  */
7404                 back->broken = 1;
7405         }
7406
7407         return 0;
7408 }
7409
7410 /*
7411  * Record orphan data ref into corresponding root.
7412  *
7413  * Return 0 if the extent item contains data ref and recorded.
7414  * Return 1 if the extent item contains no useful data ref
7415  *   On that case, it may contains only shared_dataref or metadata backref
7416  *   or the file extent exists(this should be handled by the extent bytenr
7417  *   recovery routine)
7418  * Return <0 if something goes wrong.
7419  */
7420 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7421                                       struct extent_record *rec)
7422 {
7423         struct btrfs_key key;
7424         struct btrfs_root *dest_root;
7425         struct extent_backref *back, *tmp;
7426         struct data_backref *dback;
7427         struct orphan_data_extent *orphan;
7428         struct btrfs_path *path;
7429         int recorded_data_ref = 0;
7430         int ret = 0;
7431
7432         if (rec->metadata)
7433                 return 1;
7434         path = btrfs_alloc_path();
7435         if (!path)
7436                 return -ENOMEM;
7437         rbtree_postorder_for_each_entry_safe(back, tmp,
7438                                              &rec->backref_tree, node) {
7439                 if (back->full_backref || !back->is_data ||
7440                     !back->found_extent_tree)
7441                         continue;
7442                 dback = to_data_backref(back);
7443                 if (dback->found_ref)
7444                         continue;
7445                 key.objectid = dback->root;
7446                 key.type = BTRFS_ROOT_ITEM_KEY;
7447                 key.offset = (u64)-1;
7448
7449                 dest_root = btrfs_read_fs_root(fs_info, &key);
7450
7451                 /* For non-exist root we just skip it */
7452                 if (IS_ERR(dest_root) || !dest_root)
7453                         continue;
7454
7455                 key.objectid = dback->owner;
7456                 key.type = BTRFS_EXTENT_DATA_KEY;
7457                 key.offset = dback->offset;
7458
7459                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7460                 /*
7461                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7462                  * we need to record it for inode/file extent rebuild.
7463                  * For ret > 0, we record it only for file extent rebuild.
7464                  * For ret == 0, the file extent exists but only bytenr
7465                  * mismatch, let the original bytenr fix routine to handle,
7466                  * don't record it.
7467                  */
7468                 if (ret == 0)
7469                         continue;
7470                 ret = 0;
7471                 orphan = malloc(sizeof(*orphan));
7472                 if (!orphan) {
7473                         ret = -ENOMEM;
7474                         goto out;
7475                 }
7476                 INIT_LIST_HEAD(&orphan->list);
7477                 orphan->root = dback->root;
7478                 orphan->objectid = dback->owner;
7479                 orphan->offset = dback->offset;
7480                 orphan->disk_bytenr = rec->cache.start;
7481                 orphan->disk_len = rec->cache.size;
7482                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7483                 recorded_data_ref = 1;
7484         }
7485 out:
7486         btrfs_free_path(path);
7487         if (!ret)
7488                 return !recorded_data_ref;
7489         else
7490                 return ret;
7491 }
7492
7493 /*
7494  * when an incorrect extent item is found, this will delete
7495  * all of the existing entries for it and recreate them
7496  * based on what the tree scan found.
7497  */
7498 static int fixup_extent_refs(struct btrfs_fs_info *info,
7499                              struct cache_tree *extent_cache,
7500                              struct extent_record *rec)
7501 {
7502         struct btrfs_trans_handle *trans = NULL;
7503         int ret;
7504         struct btrfs_path *path;
7505         struct cache_extent *cache;
7506         struct extent_backref *back, *tmp;
7507         int allocated = 0;
7508         u64 flags = 0;
7509
7510         if (rec->flag_block_full_backref)
7511                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7512
7513         path = btrfs_alloc_path();
7514         if (!path)
7515                 return -ENOMEM;
7516
7517         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7518                 /*
7519                  * Sometimes the backrefs themselves are so broken they don't
7520                  * get attached to any meaningful rec, so first go back and
7521                  * check any of our backrefs that we couldn't find and throw
7522                  * them into the list if we find the backref so that
7523                  * verify_backrefs can figure out what to do.
7524                  */
7525                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7526                 if (ret < 0)
7527                         goto out;
7528         }
7529
7530         /* step one, make sure all of the backrefs agree */
7531         ret = verify_backrefs(info, path, rec);
7532         if (ret < 0)
7533                 goto out;
7534
7535         trans = btrfs_start_transaction(info->extent_root, 1);
7536         if (IS_ERR(trans)) {
7537                 ret = PTR_ERR(trans);
7538                 goto out;
7539         }
7540
7541         /* step two, delete all the existing records */
7542         ret = delete_extent_records(trans, info->extent_root, path,
7543                                     rec->start, rec->max_size);
7544
7545         if (ret < 0)
7546                 goto out;
7547
7548         /* was this block corrupt?  If so, don't add references to it */
7549         cache = lookup_cache_extent(info->corrupt_blocks,
7550                                     rec->start, rec->max_size);
7551         if (cache) {
7552                 ret = 0;
7553                 goto out;
7554         }
7555
7556         /* step three, recreate all the refs we did find */
7557         rbtree_postorder_for_each_entry_safe(back, tmp,
7558                                              &rec->backref_tree, node) {
7559                 /*
7560                  * if we didn't find any references, don't create a
7561                  * new extent record
7562                  */
7563                 if (!back->found_ref)
7564                         continue;
7565
7566                 rec->bad_full_backref = 0;
7567                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7568                 allocated = 1;
7569
7570                 if (ret)
7571                         goto out;
7572         }
7573 out:
7574         if (trans) {
7575                 int err = btrfs_commit_transaction(trans, info->extent_root);
7576                 if (!ret)
7577                         ret = err;
7578         }
7579
7580         btrfs_free_path(path);
7581         return ret;
7582 }
7583
7584 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7585                               struct extent_record *rec)
7586 {
7587         struct btrfs_trans_handle *trans;
7588         struct btrfs_root *root = fs_info->extent_root;
7589         struct btrfs_path *path;
7590         struct btrfs_extent_item *ei;
7591         struct btrfs_key key;
7592         u64 flags;
7593         int ret = 0;
7594
7595         key.objectid = rec->start;
7596         if (rec->metadata) {
7597                 key.type = BTRFS_METADATA_ITEM_KEY;
7598                 key.offset = rec->info_level;
7599         } else {
7600                 key.type = BTRFS_EXTENT_ITEM_KEY;
7601                 key.offset = rec->max_size;
7602         }
7603
7604         path = btrfs_alloc_path();
7605         if (!path)
7606                 return -ENOMEM;
7607
7608         trans = btrfs_start_transaction(root, 0);
7609         if (IS_ERR(trans)) {
7610                 btrfs_free_path(path);
7611                 return PTR_ERR(trans);
7612         }
7613
7614         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7615         if (ret < 0) {
7616                 btrfs_free_path(path);
7617                 btrfs_commit_transaction(trans, root);
7618                 return ret;
7619         } else if (ret) {
7620                 fprintf(stderr, "Didn't find extent for %llu\n",
7621                         (unsigned long long)rec->start);
7622                 btrfs_free_path(path);
7623                 btrfs_commit_transaction(trans, root);
7624                 return -ENOENT;
7625         }
7626
7627         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7628                             struct btrfs_extent_item);
7629         flags = btrfs_extent_flags(path->nodes[0], ei);
7630         if (rec->flag_block_full_backref) {
7631                 fprintf(stderr, "setting full backref on %llu\n",
7632                         (unsigned long long)key.objectid);
7633                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7634         } else {
7635                 fprintf(stderr, "clearing full backref on %llu\n",
7636                         (unsigned long long)key.objectid);
7637                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7638         }
7639         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7640         btrfs_mark_buffer_dirty(path->nodes[0]);
7641         btrfs_free_path(path);
7642         return btrfs_commit_transaction(trans, root);
7643 }
7644
7645 /* right now we only prune from the extent allocation tree */
7646 static int prune_one_block(struct btrfs_trans_handle *trans,
7647                            struct btrfs_fs_info *info,
7648                            struct btrfs_corrupt_block *corrupt)
7649 {
7650         int ret;
7651         struct btrfs_path path;
7652         struct extent_buffer *eb;
7653         u64 found;
7654         int slot;
7655         int nritems;
7656         int level = corrupt->level + 1;
7657
7658         btrfs_init_path(&path);
7659 again:
7660         /* we want to stop at the parent to our busted block */
7661         path.lowest_level = level;
7662
7663         ret = btrfs_search_slot(trans, info->extent_root,
7664                                 &corrupt->key, &path, -1, 1);
7665
7666         if (ret < 0)
7667                 goto out;
7668
7669         eb = path.nodes[level];
7670         if (!eb) {
7671                 ret = -ENOENT;
7672                 goto out;
7673         }
7674
7675         /*
7676          * hopefully the search gave us the block we want to prune,
7677          * lets try that first
7678          */
7679         slot = path.slots[level];
7680         found =  btrfs_node_blockptr(eb, slot);
7681         if (found == corrupt->cache.start)
7682                 goto del_ptr;
7683
7684         nritems = btrfs_header_nritems(eb);
7685
7686         /* the search failed, lets scan this node and hope we find it */
7687         for (slot = 0; slot < nritems; slot++) {
7688                 found =  btrfs_node_blockptr(eb, slot);
7689                 if (found == corrupt->cache.start)
7690                         goto del_ptr;
7691         }
7692         /*
7693          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7694          * to this block
7695          */
7696         if (eb == info->extent_root->node) {
7697                 ret = -ENOENT;
7698                 goto out;
7699         } else {
7700                 level++;
7701                 btrfs_release_path(&path);
7702                 goto again;
7703         }
7704
7705 del_ptr:
7706         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7707         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7708
7709 out:
7710         btrfs_release_path(&path);
7711         return ret;
7712 }
7713
7714 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7715 {
7716         struct btrfs_trans_handle *trans = NULL;
7717         struct cache_extent *cache;
7718         struct btrfs_corrupt_block *corrupt;
7719
7720         while (1) {
7721                 cache = search_cache_extent(info->corrupt_blocks, 0);
7722                 if (!cache)
7723                         break;
7724                 if (!trans) {
7725                         trans = btrfs_start_transaction(info->extent_root, 1);
7726                         if (IS_ERR(trans))
7727                                 return PTR_ERR(trans);
7728                 }
7729                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7730                 prune_one_block(trans, info, corrupt);
7731                 remove_cache_extent(info->corrupt_blocks, cache);
7732         }
7733         if (trans)
7734                 return btrfs_commit_transaction(trans, info->extent_root);
7735         return 0;
7736 }
7737
7738 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7739 {
7740         struct btrfs_block_group_cache *cache;
7741         u64 start, end;
7742         int ret;
7743
7744         while (1) {
7745                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7746                                             &start, &end, EXTENT_DIRTY);
7747                 if (ret)
7748                         break;
7749                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7750                                    GFP_NOFS);
7751         }
7752
7753         start = 0;
7754         while (1) {
7755                 cache = btrfs_lookup_first_block_group(fs_info, start);
7756                 if (!cache)
7757                         break;
7758                 if (cache->cached)
7759                         cache->cached = 0;
7760                 start = cache->key.objectid + cache->key.offset;
7761         }
7762 }
7763
7764 static int check_extent_refs(struct btrfs_root *root,
7765                              struct cache_tree *extent_cache)
7766 {
7767         struct extent_record *rec;
7768         struct cache_extent *cache;
7769         int err = 0;
7770         int ret = 0;
7771         int fixed = 0;
7772         int had_dups = 0;
7773         int recorded = 0;
7774
7775         if (repair) {
7776                 /*
7777                  * if we're doing a repair, we have to make sure
7778                  * we don't allocate from the problem extents.
7779                  * In the worst case, this will be all the
7780                  * extents in the FS
7781                  */
7782                 cache = search_cache_extent(extent_cache, 0);
7783                 while(cache) {
7784                         rec = container_of(cache, struct extent_record, cache);
7785                         set_extent_dirty(root->fs_info->excluded_extents,
7786                                          rec->start,
7787                                          rec->start + rec->max_size - 1,
7788                                          GFP_NOFS);
7789                         cache = next_cache_extent(cache);
7790                 }
7791
7792                 /* pin down all the corrupted blocks too */
7793                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7794                 while(cache) {
7795                         set_extent_dirty(root->fs_info->excluded_extents,
7796                                          cache->start,
7797                                          cache->start + cache->size - 1,
7798                                          GFP_NOFS);
7799                         cache = next_cache_extent(cache);
7800                 }
7801                 prune_corrupt_blocks(root->fs_info);
7802                 reset_cached_block_groups(root->fs_info);
7803         }
7804
7805         reset_cached_block_groups(root->fs_info);
7806
7807         /*
7808          * We need to delete any duplicate entries we find first otherwise we
7809          * could mess up the extent tree when we have backrefs that actually
7810          * belong to a different extent item and not the weird duplicate one.
7811          */
7812         while (repair && !list_empty(&duplicate_extents)) {
7813                 rec = to_extent_record(duplicate_extents.next);
7814                 list_del_init(&rec->list);
7815
7816                 /* Sometimes we can find a backref before we find an actual
7817                  * extent, so we need to process it a little bit to see if there
7818                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7819                  * if this is a backref screwup.  If we need to delete stuff
7820                  * process_duplicates() will return 0, otherwise it will return
7821                  * 1 and we
7822                  */
7823                 if (process_duplicates(root, extent_cache, rec))
7824                         continue;
7825                 ret = delete_duplicate_records(root, rec);
7826                 if (ret < 0)
7827                         return ret;
7828                 /*
7829                  * delete_duplicate_records will return the number of entries
7830                  * deleted, so if it's greater than 0 then we know we actually
7831                  * did something and we need to remove.
7832                  */
7833                 if (ret)
7834                         had_dups = 1;
7835         }
7836
7837         if (had_dups)
7838                 return -EAGAIN;
7839
7840         while(1) {
7841                 int cur_err = 0;
7842
7843                 fixed = 0;
7844                 recorded = 0;
7845                 cache = search_cache_extent(extent_cache, 0);
7846                 if (!cache)
7847                         break;
7848                 rec = container_of(cache, struct extent_record, cache);
7849                 if (rec->num_duplicates) {
7850                         fprintf(stderr, "extent item %llu has multiple extent "
7851                                 "items\n", (unsigned long long)rec->start);
7852                         err = 1;
7853                         cur_err = 1;
7854                 }
7855
7856                 if (rec->refs != rec->extent_item_refs) {
7857                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7858                                 (unsigned long long)rec->start,
7859                                 (unsigned long long)rec->nr);
7860                         fprintf(stderr, "extent item %llu, found %llu\n",
7861                                 (unsigned long long)rec->extent_item_refs,
7862                                 (unsigned long long)rec->refs);
7863                         ret = record_orphan_data_extents(root->fs_info, rec);
7864                         if (ret < 0)
7865                                 goto repair_abort;
7866                         if (ret == 0) {
7867                                 recorded = 1;
7868                         } else {
7869                                 /*
7870                                  * we can't use the extent to repair file
7871                                  * extent, let the fallback method handle it.
7872                                  */
7873                                 if (!fixed && repair) {
7874                                         ret = fixup_extent_refs(
7875                                                         root->fs_info,
7876                                                         extent_cache, rec);
7877                                         if (ret)
7878                                                 goto repair_abort;
7879                                         fixed = 1;
7880                                 }
7881                         }
7882                         err = 1;
7883                         cur_err = 1;
7884                 }
7885                 if (all_backpointers_checked(rec, 1)) {
7886                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7887                                 (unsigned long long)rec->start,
7888                                 (unsigned long long)rec->nr);
7889
7890                         if (!fixed && !recorded && repair) {
7891                                 ret = fixup_extent_refs(root->fs_info,
7892                                                         extent_cache, rec);
7893                                 if (ret)
7894                                         goto repair_abort;
7895                                 fixed = 1;
7896                         }
7897                         cur_err = 1;
7898                         err = 1;
7899                 }
7900                 if (!rec->owner_ref_checked) {
7901                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7902                                 (unsigned long long)rec->start,
7903                                 (unsigned long long)rec->nr);
7904                         if (!fixed && !recorded && repair) {
7905                                 ret = fixup_extent_refs(root->fs_info,
7906                                                         extent_cache, rec);
7907                                 if (ret)
7908                                         goto repair_abort;
7909                                 fixed = 1;
7910                         }
7911                         err = 1;
7912                         cur_err = 1;
7913                 }
7914                 if (rec->bad_full_backref) {
7915                         fprintf(stderr, "bad full backref, on [%llu]\n",
7916                                 (unsigned long long)rec->start);
7917                         if (repair) {
7918                                 ret = fixup_extent_flags(root->fs_info, rec);
7919                                 if (ret)
7920                                         goto repair_abort;
7921                                 fixed = 1;
7922                         }
7923                         err = 1;
7924                         cur_err = 1;
7925                 }
7926                 /*
7927                  * Although it's not a extent ref's problem, we reuse this
7928                  * routine for error reporting.
7929                  * No repair function yet.
7930                  */
7931                 if (rec->crossing_stripes) {
7932                         fprintf(stderr,
7933                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7934                                 rec->start, rec->start + rec->max_size);
7935                         err = 1;
7936                         cur_err = 1;
7937                 }
7938
7939                 if (rec->wrong_chunk_type) {
7940                         fprintf(stderr,
7941                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7942                                 rec->start, rec->start + rec->max_size);
7943                         err = 1;
7944                         cur_err = 1;
7945                 }
7946
7947                 remove_cache_extent(extent_cache, cache);
7948                 free_all_extent_backrefs(rec);
7949                 if (!init_extent_tree && repair && (!cur_err || fixed))
7950                         clear_extent_dirty(root->fs_info->excluded_extents,
7951                                            rec->start,
7952                                            rec->start + rec->max_size - 1,
7953                                            GFP_NOFS);
7954                 free(rec);
7955         }
7956 repair_abort:
7957         if (repair) {
7958                 if (ret && ret != -EAGAIN) {
7959                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7960                         exit(1);
7961                 } else if (!ret) {
7962                         struct btrfs_trans_handle *trans;
7963
7964                         root = root->fs_info->extent_root;
7965                         trans = btrfs_start_transaction(root, 1);
7966                         if (IS_ERR(trans)) {
7967                                 ret = PTR_ERR(trans);
7968                                 goto repair_abort;
7969                         }
7970
7971                         btrfs_fix_block_accounting(trans, root);
7972                         ret = btrfs_commit_transaction(trans, root);
7973                         if (ret)
7974                                 goto repair_abort;
7975                 }
7976                 if (err)
7977                         fprintf(stderr, "repaired damaged extent references\n");
7978                 return ret;
7979         }
7980         return err;
7981 }
7982
7983 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7984 {
7985         u64 stripe_size;
7986
7987         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7988                 stripe_size = length;
7989                 stripe_size /= num_stripes;
7990         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7991                 stripe_size = length * 2;
7992                 stripe_size /= num_stripes;
7993         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7994                 stripe_size = length;
7995                 stripe_size /= (num_stripes - 1);
7996         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7997                 stripe_size = length;
7998                 stripe_size /= (num_stripes - 2);
7999         } else {
8000                 stripe_size = length;
8001         }
8002         return stripe_size;
8003 }
8004
8005 /*
8006  * Check the chunk with its block group/dev list ref:
8007  * Return 0 if all refs seems valid.
8008  * Return 1 if part of refs seems valid, need later check for rebuild ref
8009  * like missing block group and needs to search extent tree to rebuild them.
8010  * Return -1 if essential refs are missing and unable to rebuild.
8011  */
8012 static int check_chunk_refs(struct chunk_record *chunk_rec,
8013                             struct block_group_tree *block_group_cache,
8014                             struct device_extent_tree *dev_extent_cache,
8015                             int silent)
8016 {
8017         struct cache_extent *block_group_item;
8018         struct block_group_record *block_group_rec;
8019         struct cache_extent *dev_extent_item;
8020         struct device_extent_record *dev_extent_rec;
8021         u64 devid;
8022         u64 offset;
8023         u64 length;
8024         int metadump_v2 = 0;
8025         int i;
8026         int ret = 0;
8027
8028         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8029                                                chunk_rec->offset,
8030                                                chunk_rec->length);
8031         if (block_group_item) {
8032                 block_group_rec = container_of(block_group_item,
8033                                                struct block_group_record,
8034                                                cache);
8035                 if (chunk_rec->length != block_group_rec->offset ||
8036                     chunk_rec->offset != block_group_rec->objectid ||
8037                     (!metadump_v2 &&
8038                      chunk_rec->type_flags != block_group_rec->flags)) {
8039                         if (!silent)
8040                                 fprintf(stderr,
8041                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8042                                         chunk_rec->objectid,
8043                                         chunk_rec->type,
8044                                         chunk_rec->offset,
8045                                         chunk_rec->length,
8046                                         chunk_rec->offset,
8047                                         chunk_rec->type_flags,
8048                                         block_group_rec->objectid,
8049                                         block_group_rec->type,
8050                                         block_group_rec->offset,
8051                                         block_group_rec->offset,
8052                                         block_group_rec->objectid,
8053                                         block_group_rec->flags);
8054                         ret = -1;
8055                 } else {
8056                         list_del_init(&block_group_rec->list);
8057                         chunk_rec->bg_rec = block_group_rec;
8058                 }
8059         } else {
8060                 if (!silent)
8061                         fprintf(stderr,
8062                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8063                                 chunk_rec->objectid,
8064                                 chunk_rec->type,
8065                                 chunk_rec->offset,
8066                                 chunk_rec->length,
8067                                 chunk_rec->offset,
8068                                 chunk_rec->type_flags);
8069                 ret = 1;
8070         }
8071
8072         if (metadump_v2)
8073                 return ret;
8074
8075         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8076                                     chunk_rec->num_stripes);
8077         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8078                 devid = chunk_rec->stripes[i].devid;
8079                 offset = chunk_rec->stripes[i].offset;
8080                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8081                                                        devid, offset, length);
8082                 if (dev_extent_item) {
8083                         dev_extent_rec = container_of(dev_extent_item,
8084                                                 struct device_extent_record,
8085                                                 cache);
8086                         if (dev_extent_rec->objectid != devid ||
8087                             dev_extent_rec->offset != offset ||
8088                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8089                             dev_extent_rec->length != length) {
8090                                 if (!silent)
8091                                         fprintf(stderr,
8092                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8093                                                 chunk_rec->objectid,
8094                                                 chunk_rec->type,
8095                                                 chunk_rec->offset,
8096                                                 chunk_rec->stripes[i].devid,
8097                                                 chunk_rec->stripes[i].offset,
8098                                                 dev_extent_rec->objectid,
8099                                                 dev_extent_rec->offset,
8100                                                 dev_extent_rec->length);
8101                                 ret = -1;
8102                         } else {
8103                                 list_move(&dev_extent_rec->chunk_list,
8104                                           &chunk_rec->dextents);
8105                         }
8106                 } else {
8107                         if (!silent)
8108                                 fprintf(stderr,
8109                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8110                                         chunk_rec->objectid,
8111                                         chunk_rec->type,
8112                                         chunk_rec->offset,
8113                                         chunk_rec->stripes[i].devid,
8114                                         chunk_rec->stripes[i].offset);
8115                         ret = -1;
8116                 }
8117         }
8118         return ret;
8119 }
8120
8121 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8122 int check_chunks(struct cache_tree *chunk_cache,
8123                  struct block_group_tree *block_group_cache,
8124                  struct device_extent_tree *dev_extent_cache,
8125                  struct list_head *good, struct list_head *bad,
8126                  struct list_head *rebuild, int silent)
8127 {
8128         struct cache_extent *chunk_item;
8129         struct chunk_record *chunk_rec;
8130         struct block_group_record *bg_rec;
8131         struct device_extent_record *dext_rec;
8132         int err;
8133         int ret = 0;
8134
8135         chunk_item = first_cache_extent(chunk_cache);
8136         while (chunk_item) {
8137                 chunk_rec = container_of(chunk_item, struct chunk_record,
8138                                          cache);
8139                 err = check_chunk_refs(chunk_rec, block_group_cache,
8140                                        dev_extent_cache, silent);
8141                 if (err < 0)
8142                         ret = err;
8143                 if (err == 0 && good)
8144                         list_add_tail(&chunk_rec->list, good);
8145                 if (err > 0 && rebuild)
8146                         list_add_tail(&chunk_rec->list, rebuild);
8147                 if (err < 0 && bad)
8148                         list_add_tail(&chunk_rec->list, bad);
8149                 chunk_item = next_cache_extent(chunk_item);
8150         }
8151
8152         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8153                 if (!silent)
8154                         fprintf(stderr,
8155                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8156                                 bg_rec->objectid,
8157                                 bg_rec->offset,
8158                                 bg_rec->flags);
8159                 if (!ret)
8160                         ret = 1;
8161         }
8162
8163         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8164                             chunk_list) {
8165                 if (!silent)
8166                         fprintf(stderr,
8167                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8168                                 dext_rec->objectid,
8169                                 dext_rec->offset,
8170                                 dext_rec->length);
8171                 if (!ret)
8172                         ret = 1;
8173         }
8174         return ret;
8175 }
8176
8177
8178 static int check_device_used(struct device_record *dev_rec,
8179                              struct device_extent_tree *dext_cache)
8180 {
8181         struct cache_extent *cache;
8182         struct device_extent_record *dev_extent_rec;
8183         u64 total_byte = 0;
8184
8185         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8186         while (cache) {
8187                 dev_extent_rec = container_of(cache,
8188                                               struct device_extent_record,
8189                                               cache);
8190                 if (dev_extent_rec->objectid != dev_rec->devid)
8191                         break;
8192
8193                 list_del_init(&dev_extent_rec->device_list);
8194                 total_byte += dev_extent_rec->length;
8195                 cache = next_cache_extent(cache);
8196         }
8197
8198         if (total_byte != dev_rec->byte_used) {
8199                 fprintf(stderr,
8200                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8201                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8202                         dev_rec->type, dev_rec->offset);
8203                 return -1;
8204         } else {
8205                 return 0;
8206         }
8207 }
8208
8209 /* check btrfs_dev_item -> btrfs_dev_extent */
8210 static int check_devices(struct rb_root *dev_cache,
8211                          struct device_extent_tree *dev_extent_cache)
8212 {
8213         struct rb_node *dev_node;
8214         struct device_record *dev_rec;
8215         struct device_extent_record *dext_rec;
8216         int err;
8217         int ret = 0;
8218
8219         dev_node = rb_first(dev_cache);
8220         while (dev_node) {
8221                 dev_rec = container_of(dev_node, struct device_record, node);
8222                 err = check_device_used(dev_rec, dev_extent_cache);
8223                 if (err)
8224                         ret = err;
8225
8226                 dev_node = rb_next(dev_node);
8227         }
8228         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8229                             device_list) {
8230                 fprintf(stderr,
8231                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8232                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8233                 if (!ret)
8234                         ret = 1;
8235         }
8236         return ret;
8237 }
8238
8239 static int add_root_item_to_list(struct list_head *head,
8240                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8241                                   u8 level, u8 drop_level,
8242                                   int level_size, struct btrfs_key *drop_key)
8243 {
8244
8245         struct root_item_record *ri_rec;
8246         ri_rec = malloc(sizeof(*ri_rec));
8247         if (!ri_rec)
8248                 return -ENOMEM;
8249         ri_rec->bytenr = bytenr;
8250         ri_rec->objectid = objectid;
8251         ri_rec->level = level;
8252         ri_rec->level_size = level_size;
8253         ri_rec->drop_level = drop_level;
8254         ri_rec->last_snapshot = last_snapshot;
8255         if (drop_key)
8256                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8257         list_add_tail(&ri_rec->list, head);
8258
8259         return 0;
8260 }
8261
8262 static void free_root_item_list(struct list_head *list)
8263 {
8264         struct root_item_record *ri_rec;
8265
8266         while (!list_empty(list)) {
8267                 ri_rec = list_first_entry(list, struct root_item_record,
8268                                           list);
8269                 list_del_init(&ri_rec->list);
8270                 free(ri_rec);
8271         }
8272 }
8273
8274 static int deal_root_from_list(struct list_head *list,
8275                                struct btrfs_root *root,
8276                                struct block_info *bits,
8277                                int bits_nr,
8278                                struct cache_tree *pending,
8279                                struct cache_tree *seen,
8280                                struct cache_tree *reada,
8281                                struct cache_tree *nodes,
8282                                struct cache_tree *extent_cache,
8283                                struct cache_tree *chunk_cache,
8284                                struct rb_root *dev_cache,
8285                                struct block_group_tree *block_group_cache,
8286                                struct device_extent_tree *dev_extent_cache)
8287 {
8288         int ret = 0;
8289         u64 last;
8290
8291         while (!list_empty(list)) {
8292                 struct root_item_record *rec;
8293                 struct extent_buffer *buf;
8294                 rec = list_entry(list->next,
8295                                  struct root_item_record, list);
8296                 last = 0;
8297                 buf = read_tree_block(root->fs_info->tree_root,
8298                                       rec->bytenr, rec->level_size, 0);
8299                 if (!extent_buffer_uptodate(buf)) {
8300                         free_extent_buffer(buf);
8301                         ret = -EIO;
8302                         break;
8303                 }
8304                 add_root_to_pending(buf, extent_cache, pending,
8305                                     seen, nodes, rec->objectid);
8306                 /*
8307                  * To rebuild extent tree, we need deal with snapshot
8308                  * one by one, otherwise we deal with node firstly which
8309                  * can maximize readahead.
8310                  */
8311                 while (1) {
8312                         ret = run_next_block(root, bits, bits_nr, &last,
8313                                              pending, seen, reada, nodes,
8314                                              extent_cache, chunk_cache,
8315                                              dev_cache, block_group_cache,
8316                                              dev_extent_cache, rec);
8317                         if (ret != 0)
8318                                 break;
8319                 }
8320                 free_extent_buffer(buf);
8321                 list_del(&rec->list);
8322                 free(rec);
8323                 if (ret < 0)
8324                         break;
8325         }
8326         while (ret >= 0) {
8327                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8328                                      reada, nodes, extent_cache, chunk_cache,
8329                                      dev_cache, block_group_cache,
8330                                      dev_extent_cache, NULL);
8331                 if (ret != 0) {
8332                         if (ret > 0)
8333                                 ret = 0;
8334                         break;
8335                 }
8336         }
8337         return ret;
8338 }
8339
8340 static int check_chunks_and_extents(struct btrfs_root *root)
8341 {
8342         struct rb_root dev_cache;
8343         struct cache_tree chunk_cache;
8344         struct block_group_tree block_group_cache;
8345         struct device_extent_tree dev_extent_cache;
8346         struct cache_tree extent_cache;
8347         struct cache_tree seen;
8348         struct cache_tree pending;
8349         struct cache_tree reada;
8350         struct cache_tree nodes;
8351         struct extent_io_tree excluded_extents;
8352         struct cache_tree corrupt_blocks;
8353         struct btrfs_path path;
8354         struct btrfs_key key;
8355         struct btrfs_key found_key;
8356         int ret, err = 0;
8357         struct block_info *bits;
8358         int bits_nr;
8359         struct extent_buffer *leaf;
8360         int slot;
8361         struct btrfs_root_item ri;
8362         struct list_head dropping_trees;
8363         struct list_head normal_trees;
8364         struct btrfs_root *root1;
8365         u64 objectid;
8366         u32 level_size;
8367         u8 level;
8368
8369         dev_cache = RB_ROOT;
8370         cache_tree_init(&chunk_cache);
8371         block_group_tree_init(&block_group_cache);
8372         device_extent_tree_init(&dev_extent_cache);
8373
8374         cache_tree_init(&extent_cache);
8375         cache_tree_init(&seen);
8376         cache_tree_init(&pending);
8377         cache_tree_init(&nodes);
8378         cache_tree_init(&reada);
8379         cache_tree_init(&corrupt_blocks);
8380         extent_io_tree_init(&excluded_extents);
8381         INIT_LIST_HEAD(&dropping_trees);
8382         INIT_LIST_HEAD(&normal_trees);
8383
8384         if (repair) {
8385                 root->fs_info->excluded_extents = &excluded_extents;
8386                 root->fs_info->fsck_extent_cache = &extent_cache;
8387                 root->fs_info->free_extent_hook = free_extent_hook;
8388                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8389         }
8390
8391         bits_nr = 1024;
8392         bits = malloc(bits_nr * sizeof(struct block_info));
8393         if (!bits) {
8394                 perror("malloc");
8395                 exit(1);
8396         }
8397
8398         if (ctx.progress_enabled) {
8399                 ctx.tp = TASK_EXTENTS;
8400                 task_start(ctx.info);
8401         }
8402
8403 again:
8404         root1 = root->fs_info->tree_root;
8405         level = btrfs_header_level(root1->node);
8406         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8407                                     root1->node->start, 0, level, 0,
8408                                     root1->nodesize, NULL);
8409         if (ret < 0)
8410                 goto out;
8411         root1 = root->fs_info->chunk_root;
8412         level = btrfs_header_level(root1->node);
8413         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8414                                     root1->node->start, 0, level, 0,
8415                                     root1->nodesize, NULL);
8416         if (ret < 0)
8417                 goto out;
8418         btrfs_init_path(&path);
8419         key.offset = 0;
8420         key.objectid = 0;
8421         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8422         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8423                                         &key, &path, 0, 0);
8424         if (ret < 0)
8425                 goto out;
8426         while(1) {
8427                 leaf = path.nodes[0];
8428                 slot = path.slots[0];
8429                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8430                         ret = btrfs_next_leaf(root, &path);
8431                         if (ret != 0)
8432                                 break;
8433                         leaf = path.nodes[0];
8434                         slot = path.slots[0];
8435                 }
8436                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8437                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8438                         unsigned long offset;
8439                         u64 last_snapshot;
8440
8441                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8442                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8443                         last_snapshot = btrfs_root_last_snapshot(&ri);
8444                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8445                                 level = btrfs_root_level(&ri);
8446                                 level_size = root->nodesize;
8447                                 ret = add_root_item_to_list(&normal_trees,
8448                                                 found_key.objectid,
8449                                                 btrfs_root_bytenr(&ri),
8450                                                 last_snapshot, level,
8451                                                 0, level_size, NULL);
8452                                 if (ret < 0)
8453                                         goto out;
8454                         } else {
8455                                 level = btrfs_root_level(&ri);
8456                                 level_size = root->nodesize;
8457                                 objectid = found_key.objectid;
8458                                 btrfs_disk_key_to_cpu(&found_key,
8459                                                       &ri.drop_progress);
8460                                 ret = add_root_item_to_list(&dropping_trees,
8461                                                 objectid,
8462                                                 btrfs_root_bytenr(&ri),
8463                                                 last_snapshot, level,
8464                                                 ri.drop_level,
8465                                                 level_size, &found_key);
8466                                 if (ret < 0)
8467                                         goto out;
8468                         }
8469                 }
8470                 path.slots[0]++;
8471         }
8472         btrfs_release_path(&path);
8473
8474         /*
8475          * check_block can return -EAGAIN if it fixes something, please keep
8476          * this in mind when dealing with return values from these functions, if
8477          * we get -EAGAIN we want to fall through and restart the loop.
8478          */
8479         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8480                                   &seen, &reada, &nodes, &extent_cache,
8481                                   &chunk_cache, &dev_cache, &block_group_cache,
8482                                   &dev_extent_cache);
8483         if (ret < 0) {
8484                 if (ret == -EAGAIN)
8485                         goto loop;
8486                 goto out;
8487         }
8488         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8489                                   &pending, &seen, &reada, &nodes,
8490                                   &extent_cache, &chunk_cache, &dev_cache,
8491                                   &block_group_cache, &dev_extent_cache);
8492         if (ret < 0) {
8493                 if (ret == -EAGAIN)
8494                         goto loop;
8495                 goto out;
8496         }
8497
8498         ret = check_chunks(&chunk_cache, &block_group_cache,
8499                            &dev_extent_cache, NULL, NULL, NULL, 0);
8500         if (ret) {
8501                 if (ret == -EAGAIN)
8502                         goto loop;
8503                 err = ret;
8504         }
8505
8506         ret = check_extent_refs(root, &extent_cache);
8507         if (ret < 0) {
8508                 if (ret == -EAGAIN)
8509                         goto loop;
8510                 goto out;
8511         }
8512
8513         ret = check_devices(&dev_cache, &dev_extent_cache);
8514         if (ret && err)
8515                 ret = err;
8516
8517 out:
8518         task_stop(ctx.info);
8519         if (repair) {
8520                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8521                 extent_io_tree_cleanup(&excluded_extents);
8522                 root->fs_info->fsck_extent_cache = NULL;
8523                 root->fs_info->free_extent_hook = NULL;
8524                 root->fs_info->corrupt_blocks = NULL;
8525                 root->fs_info->excluded_extents = NULL;
8526         }
8527         free(bits);
8528         free_chunk_cache_tree(&chunk_cache);
8529         free_device_cache_tree(&dev_cache);
8530         free_block_group_tree(&block_group_cache);
8531         free_device_extent_tree(&dev_extent_cache);
8532         free_extent_cache_tree(&seen);
8533         free_extent_cache_tree(&pending);
8534         free_extent_cache_tree(&reada);
8535         free_extent_cache_tree(&nodes);
8536         return ret;
8537 loop:
8538         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8539         free_extent_cache_tree(&seen);
8540         free_extent_cache_tree(&pending);
8541         free_extent_cache_tree(&reada);
8542         free_extent_cache_tree(&nodes);
8543         free_chunk_cache_tree(&chunk_cache);
8544         free_block_group_tree(&block_group_cache);
8545         free_device_cache_tree(&dev_cache);
8546         free_device_extent_tree(&dev_extent_cache);
8547         free_extent_record_cache(root->fs_info, &extent_cache);
8548         free_root_item_list(&normal_trees);
8549         free_root_item_list(&dropping_trees);
8550         extent_io_tree_cleanup(&excluded_extents);
8551         goto again;
8552 }
8553
8554 /*
8555  * Check backrefs of a tree block given by @bytenr or @eb.
8556  *
8557  * @root:       the root containing the @bytenr or @eb
8558  * @eb:         tree block extent buffer, can be NULL
8559  * @bytenr:     bytenr of the tree block to search
8560  * @level:      tree level of the tree block
8561  * @owner:      owner of the tree block
8562  *
8563  * Return >0 for any error found and output error message
8564  * Return 0 for no error found
8565  */
8566 static int check_tree_block_ref(struct btrfs_root *root,
8567                                 struct extent_buffer *eb, u64 bytenr,
8568                                 int level, u64 owner)
8569 {
8570         struct btrfs_key key;
8571         struct btrfs_root *extent_root = root->fs_info->extent_root;
8572         struct btrfs_path path;
8573         struct btrfs_extent_item *ei;
8574         struct btrfs_extent_inline_ref *iref;
8575         struct extent_buffer *leaf;
8576         unsigned long end;
8577         unsigned long ptr;
8578         int slot;
8579         int skinny_level;
8580         int type;
8581         u32 nodesize = root->nodesize;
8582         u32 item_size;
8583         u64 offset;
8584         int found_ref = 0;
8585         int err = 0;
8586         int ret;
8587
8588         btrfs_init_path(&path);
8589         key.objectid = bytenr;
8590         if (btrfs_fs_incompat(root->fs_info,
8591                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8592                 key.type = BTRFS_METADATA_ITEM_KEY;
8593         else
8594                 key.type = BTRFS_EXTENT_ITEM_KEY;
8595         key.offset = (u64)-1;
8596
8597         /* Search for the backref in extent tree */
8598         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8599         if (ret < 0) {
8600                 err |= BACKREF_MISSING;
8601                 goto out;
8602         }
8603         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8604         if (ret) {
8605                 err |= BACKREF_MISSING;
8606                 goto out;
8607         }
8608
8609         leaf = path.nodes[0];
8610         slot = path.slots[0];
8611         btrfs_item_key_to_cpu(leaf, &key, slot);
8612
8613         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8614
8615         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8616                 skinny_level = (int)key.offset;
8617                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8618         } else {
8619                 struct btrfs_tree_block_info *info;
8620
8621                 info = (struct btrfs_tree_block_info *)(ei + 1);
8622                 skinny_level = btrfs_tree_block_level(leaf, info);
8623                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8624         }
8625
8626         if (eb) {
8627                 u64 header_gen;
8628                 u64 extent_gen;
8629
8630                 if (!(btrfs_extent_flags(leaf, ei) &
8631                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8632                         error(
8633                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8634                                 key.objectid, nodesize,
8635                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8636                         err = BACKREF_MISMATCH;
8637                 }
8638                 header_gen = btrfs_header_generation(eb);
8639                 extent_gen = btrfs_extent_generation(leaf, ei);
8640                 if (header_gen != extent_gen) {
8641                         error(
8642         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8643                                 key.objectid, nodesize, header_gen,
8644                                 extent_gen);
8645                         err = BACKREF_MISMATCH;
8646                 }
8647                 if (level != skinny_level) {
8648                         error(
8649                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8650                                 key.objectid, nodesize, level, skinny_level);
8651                         err = BACKREF_MISMATCH;
8652                 }
8653                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8654                         error(
8655                         "extent[%llu %u] is referred by other roots than %llu",
8656                                 key.objectid, nodesize, root->objectid);
8657                         err = BACKREF_MISMATCH;
8658                 }
8659         }
8660
8661         /*
8662          * Iterate the extent/metadata item to find the exact backref
8663          */
8664         item_size = btrfs_item_size_nr(leaf, slot);
8665         ptr = (unsigned long)iref;
8666         end = (unsigned long)ei + item_size;
8667         while (ptr < end) {
8668                 iref = (struct btrfs_extent_inline_ref *)ptr;
8669                 type = btrfs_extent_inline_ref_type(leaf, iref);
8670                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8671
8672                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8673                         (offset == root->objectid || offset == owner)) {
8674                         found_ref = 1;
8675                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8676                         /* Check if the backref points to valid referencer */
8677                         found_ref = !check_tree_block_ref(root, NULL, offset,
8678                                                           level + 1, owner);
8679                 }
8680
8681                 if (found_ref)
8682                         break;
8683                 ptr += btrfs_extent_inline_ref_size(type);
8684         }
8685
8686         /*
8687          * Inlined extent item doesn't have what we need, check
8688          * TREE_BLOCK_REF_KEY
8689          */
8690         if (!found_ref) {
8691                 btrfs_release_path(&path);
8692                 key.objectid = bytenr;
8693                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8694                 key.offset = root->objectid;
8695
8696                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8697                 if (!ret)
8698                         found_ref = 1;
8699         }
8700         if (!found_ref)
8701                 err |= BACKREF_MISSING;
8702 out:
8703         btrfs_release_path(&path);
8704         if (eb && (err & BACKREF_MISSING))
8705                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8706                         bytenr, nodesize, owner, level);
8707         return err;
8708 }
8709
8710 /*
8711  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8712  *
8713  * Return >0 any error found and output error message
8714  * Return 0 for no error found
8715  */
8716 static int check_extent_data_item(struct btrfs_root *root,
8717                                   struct extent_buffer *eb, int slot)
8718 {
8719         struct btrfs_file_extent_item *fi;
8720         struct btrfs_path path;
8721         struct btrfs_root *extent_root = root->fs_info->extent_root;
8722         struct btrfs_key fi_key;
8723         struct btrfs_key dbref_key;
8724         struct extent_buffer *leaf;
8725         struct btrfs_extent_item *ei;
8726         struct btrfs_extent_inline_ref *iref;
8727         struct btrfs_extent_data_ref *dref;
8728         u64 owner;
8729         u64 file_extent_gen;
8730         u64 disk_bytenr;
8731         u64 disk_num_bytes;
8732         u64 extent_num_bytes;
8733         u64 extent_flags;
8734         u64 extent_gen;
8735         u32 item_size;
8736         unsigned long end;
8737         unsigned long ptr;
8738         int type;
8739         u64 ref_root;
8740         int found_dbackref = 0;
8741         int err = 0;
8742         int ret;
8743
8744         btrfs_item_key_to_cpu(eb, &fi_key, slot);
8745         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8746         file_extent_gen = btrfs_file_extent_generation(eb, fi);
8747
8748         /* Nothing to check for hole and inline data extents */
8749         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8750             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8751                 return 0;
8752
8753         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8754         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8755         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8756
8757         /* Check unaligned disk_num_bytes and num_bytes */
8758         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8759                 error(
8760 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8761                         fi_key.objectid, fi_key.offset, disk_num_bytes,
8762                         root->sectorsize);
8763                 err |= BYTES_UNALIGNED;
8764         } else {
8765                 data_bytes_allocated += disk_num_bytes;
8766         }
8767         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8768                 error(
8769 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8770                         fi_key.objectid, fi_key.offset, extent_num_bytes,
8771                         root->sectorsize);
8772                 err |= BYTES_UNALIGNED;
8773         } else {
8774                 data_bytes_referenced += extent_num_bytes;
8775         }
8776         owner = btrfs_header_owner(eb);
8777
8778         /* Check the extent item of the file extent in extent tree */
8779         btrfs_init_path(&path);
8780         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8781         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8782         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8783
8784         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8785         if (ret) {
8786                 err |= BACKREF_MISSING;
8787                 goto error;
8788         }
8789
8790         leaf = path.nodes[0];
8791         slot = path.slots[0];
8792         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8793
8794         extent_flags = btrfs_extent_flags(leaf, ei);
8795         extent_gen = btrfs_extent_generation(leaf, ei);
8796
8797         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8798                 error(
8799                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8800                     disk_bytenr, disk_num_bytes,
8801                     BTRFS_EXTENT_FLAG_DATA);
8802                 err |= BACKREF_MISMATCH;
8803         }
8804
8805         if (file_extent_gen < extent_gen) {
8806                 error(
8807 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8808                         disk_bytenr, disk_num_bytes, file_extent_gen,
8809                         extent_gen);
8810                 err |= BACKREF_MISMATCH;
8811         }
8812
8813         /* Check data backref inside that extent item */
8814         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8815         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8816         ptr = (unsigned long)iref;
8817         end = (unsigned long)ei + item_size;
8818         while (ptr < end) {
8819                 iref = (struct btrfs_extent_inline_ref *)ptr;
8820                 type = btrfs_extent_inline_ref_type(leaf, iref);
8821                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8822
8823                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8824                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
8825                         if (ref_root == owner || ref_root == root->objectid)
8826                                 found_dbackref = 1;
8827                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8828                         found_dbackref = !check_tree_block_ref(root, NULL,
8829                                 btrfs_extent_inline_ref_offset(leaf, iref),
8830                                 0, owner);
8831                 }
8832
8833                 if (found_dbackref)
8834                         break;
8835                 ptr += btrfs_extent_inline_ref_size(type);
8836         }
8837
8838         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8839         if (!found_dbackref) {
8840                 btrfs_release_path(&path);
8841
8842                 btrfs_init_path(&path);
8843                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8844                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8845                 dbref_key.offset = hash_extent_data_ref(root->objectid,
8846                                 fi_key.objectid, fi_key.offset);
8847
8848                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8849                                         &dbref_key, &path, 0, 0);
8850                 if (!ret)
8851                         found_dbackref = 1;
8852         }
8853
8854         if (!found_dbackref)
8855                 err |= BACKREF_MISSING;
8856 error:
8857         btrfs_release_path(&path);
8858         if (err & BACKREF_MISSING) {
8859                 error("data extent[%llu %llu] backref lost",
8860                       disk_bytenr, disk_num_bytes);
8861         }
8862         return err;
8863 }
8864
8865 /*
8866  * Get real tree block level for the case like shared block
8867  * Return >= 0 as tree level
8868  * Return <0 for error
8869  */
8870 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8871 {
8872         struct extent_buffer *eb;
8873         struct btrfs_path path;
8874         struct btrfs_key key;
8875         struct btrfs_extent_item *ei;
8876         u64 flags;
8877         u64 transid;
8878         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8879         u8 backref_level;
8880         u8 header_level;
8881         int ret;
8882
8883         /* Search extent tree for extent generation and level */
8884         key.objectid = bytenr;
8885         key.type = BTRFS_METADATA_ITEM_KEY;
8886         key.offset = (u64)-1;
8887
8888         btrfs_init_path(&path);
8889         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8890         if (ret < 0)
8891                 goto release_out;
8892         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8893         if (ret < 0)
8894                 goto release_out;
8895         if (ret > 0) {
8896                 ret = -ENOENT;
8897                 goto release_out;
8898         }
8899
8900         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8901         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8902                             struct btrfs_extent_item);
8903         flags = btrfs_extent_flags(path.nodes[0], ei);
8904         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8905                 ret = -ENOENT;
8906                 goto release_out;
8907         }
8908
8909         /* Get transid for later read_tree_block() check */
8910         transid = btrfs_extent_generation(path.nodes[0], ei);
8911
8912         /* Get backref level as one source */
8913         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8914                 backref_level = key.offset;
8915         } else {
8916                 struct btrfs_tree_block_info *info;
8917
8918                 info = (struct btrfs_tree_block_info *)(ei + 1);
8919                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8920         }
8921         btrfs_release_path(&path);
8922
8923         /* Get level from tree block as an alternative source */
8924         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8925         if (!extent_buffer_uptodate(eb)) {
8926                 free_extent_buffer(eb);
8927                 return -EIO;
8928         }
8929         header_level = btrfs_header_level(eb);
8930         free_extent_buffer(eb);
8931
8932         if (header_level != backref_level)
8933                 return -EIO;
8934         return header_level;
8935
8936 release_out:
8937         btrfs_release_path(&path);
8938         return ret;
8939 }
8940
8941 /*
8942  * Check if a tree block backref is valid (points to a valid tree block)
8943  * if level == -1, level will be resolved
8944  * Return >0 for any error found and print error message
8945  */
8946 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8947                                     u64 bytenr, int level)
8948 {
8949         struct btrfs_root *root;
8950         struct btrfs_key key;
8951         struct btrfs_path path;
8952         struct extent_buffer *eb;
8953         struct extent_buffer *node;
8954         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8955         int err = 0;
8956         int ret;
8957
8958         /* Query level for level == -1 special case */
8959         if (level == -1)
8960                 level = query_tree_block_level(fs_info, bytenr);
8961         if (level < 0) {
8962                 err |= REFERENCER_MISSING;
8963                 goto out;
8964         }
8965
8966         key.objectid = root_id;
8967         key.type = BTRFS_ROOT_ITEM_KEY;
8968         key.offset = (u64)-1;
8969
8970         root = btrfs_read_fs_root(fs_info, &key);
8971         if (IS_ERR(root)) {
8972                 err |= REFERENCER_MISSING;
8973                 goto out;
8974         }
8975
8976         /* Read out the tree block to get item/node key */
8977         eb = read_tree_block(root, bytenr, root->nodesize, 0);
8978         if (!extent_buffer_uptodate(eb)) {
8979                 err |= REFERENCER_MISSING;
8980                 free_extent_buffer(eb);
8981                 goto out;
8982         }
8983
8984         /* Empty tree, no need to check key */
8985         if (!btrfs_header_nritems(eb) && !level) {
8986                 free_extent_buffer(eb);
8987                 goto out;
8988         }
8989
8990         if (level)
8991                 btrfs_node_key_to_cpu(eb, &key, 0);
8992         else
8993                 btrfs_item_key_to_cpu(eb, &key, 0);
8994
8995         free_extent_buffer(eb);
8996
8997         btrfs_init_path(&path);
8998         /* Search with the first key, to ensure we can reach it */
8999         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9000         if (ret) {
9001                 err |= REFERENCER_MISSING;
9002                 goto release_out;
9003         }
9004
9005         node = path.nodes[level];
9006         if (btrfs_header_bytenr(node) != bytenr) {
9007                 error(
9008         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9009                         bytenr, nodesize, bytenr,
9010                         btrfs_header_bytenr(node));
9011                 err |= REFERENCER_MISMATCH;
9012         }
9013         if (btrfs_header_level(node) != level) {
9014                 error(
9015         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9016                         bytenr, nodesize, level,
9017                         btrfs_header_level(node));
9018                 err |= REFERENCER_MISMATCH;
9019         }
9020
9021 release_out:
9022         btrfs_release_path(&path);
9023 out:
9024         if (err & REFERENCER_MISSING) {
9025                 if (level < 0)
9026                         error("extent [%llu %d] lost referencer (owner: %llu)",
9027                                 bytenr, nodesize, root_id);
9028                 else
9029                         error(
9030                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9031                                 bytenr, nodesize, root_id, level);
9032         }
9033
9034         return err;
9035 }
9036
9037 /*
9038  * Check referencer for shared block backref
9039  * If level == -1, this function will resolve the level.
9040  */
9041 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9042                                      u64 parent, u64 bytenr, int level)
9043 {
9044         struct extent_buffer *eb;
9045         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9046         u32 nr;
9047         int found_parent = 0;
9048         int i;
9049
9050         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9051         if (!extent_buffer_uptodate(eb))
9052                 goto out;
9053
9054         if (level == -1)
9055                 level = query_tree_block_level(fs_info, bytenr);
9056         if (level < 0)
9057                 goto out;
9058
9059         if (level + 1 != btrfs_header_level(eb))
9060                 goto out;
9061
9062         nr = btrfs_header_nritems(eb);
9063         for (i = 0; i < nr; i++) {
9064                 if (bytenr == btrfs_node_blockptr(eb, i)) {
9065                         found_parent = 1;
9066                         break;
9067                 }
9068         }
9069 out:
9070         free_extent_buffer(eb);
9071         if (!found_parent) {
9072                 error(
9073         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9074                         bytenr, nodesize, parent, level);
9075                 return REFERENCER_MISSING;
9076         }
9077         return 0;
9078 }
9079
9080 /*
9081  * Check referencer for normal (inlined) data ref
9082  * If len == 0, it will be resolved by searching in extent tree
9083  */
9084 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9085                                      u64 root_id, u64 objectid, u64 offset,
9086                                      u64 bytenr, u64 len, u32 count)
9087 {
9088         struct btrfs_root *root;
9089         struct btrfs_root *extent_root = fs_info->extent_root;
9090         struct btrfs_key key;
9091         struct btrfs_path path;
9092         struct extent_buffer *leaf;
9093         struct btrfs_file_extent_item *fi;
9094         u32 found_count = 0;
9095         int slot;
9096         int ret = 0;
9097
9098         if (!len) {
9099                 key.objectid = bytenr;
9100                 key.type = BTRFS_EXTENT_ITEM_KEY;
9101                 key.offset = (u64)-1;
9102
9103                 btrfs_init_path(&path);
9104                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9105                 if (ret < 0)
9106                         goto out;
9107                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9108                 if (ret)
9109                         goto out;
9110                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9111                 if (key.objectid != bytenr ||
9112                     key.type != BTRFS_EXTENT_ITEM_KEY)
9113                         goto out;
9114                 len = key.offset;
9115                 btrfs_release_path(&path);
9116         }
9117         key.objectid = root_id;
9118         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9119         key.offset = (u64)-1;
9120         btrfs_init_path(&path);
9121
9122         root = btrfs_read_fs_root(fs_info, &key);
9123         if (IS_ERR(root))
9124                 goto out;
9125
9126         key.objectid = objectid;
9127         key.type = BTRFS_EXTENT_DATA_KEY;
9128         /*
9129          * It can be nasty as data backref offset is
9130          * file offset - file extent offset, which is smaller or
9131          * equal to original backref offset.  The only special case is
9132          * overflow.  So we need to special check and do further search.
9133          */
9134         key.offset = offset & (1ULL << 63) ? 0 : offset;
9135
9136         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9137         if (ret < 0)
9138                 goto out;
9139
9140         /*
9141          * Search afterwards to get correct one
9142          * NOTE: As we must do a comprehensive check on the data backref to
9143          * make sure the dref count also matches, we must iterate all file
9144          * extents for that inode.
9145          */
9146         while (1) {
9147                 leaf = path.nodes[0];
9148                 slot = path.slots[0];
9149
9150                 btrfs_item_key_to_cpu(leaf, &key, slot);
9151                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9152                         break;
9153                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9154                 /*
9155                  * Except normal disk bytenr and disk num bytes, we still
9156                  * need to do extra check on dbackref offset as
9157                  * dbackref offset = file_offset - file_extent_offset
9158                  */
9159                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9160                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9161                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9162                     offset)
9163                         found_count++;
9164
9165                 ret = btrfs_next_item(root, &path);
9166                 if (ret)
9167                         break;
9168         }
9169 out:
9170         btrfs_release_path(&path);
9171         if (found_count != count) {
9172                 error(
9173 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9174                         bytenr, len, root_id, objectid, offset, count, found_count);
9175                 return REFERENCER_MISSING;
9176         }
9177         return 0;
9178 }
9179
9180 /*
9181  * Check if the referencer of a shared data backref exists
9182  */
9183 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9184                                      u64 parent, u64 bytenr)
9185 {
9186         struct extent_buffer *eb;
9187         struct btrfs_key key;
9188         struct btrfs_file_extent_item *fi;
9189         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9190         u32 nr;
9191         int found_parent = 0;
9192         int i;
9193
9194         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9195         if (!extent_buffer_uptodate(eb))
9196                 goto out;
9197
9198         nr = btrfs_header_nritems(eb);
9199         for (i = 0; i < nr; i++) {
9200                 btrfs_item_key_to_cpu(eb, &key, i);
9201                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9202                         continue;
9203
9204                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9205                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9206                         continue;
9207
9208                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9209                         found_parent = 1;
9210                         break;
9211                 }
9212         }
9213
9214 out:
9215         free_extent_buffer(eb);
9216         if (!found_parent) {
9217                 error("shared extent %llu referencer lost (parent: %llu)",
9218                         bytenr, parent);
9219                 return REFERENCER_MISSING;
9220         }
9221         return 0;
9222 }
9223
9224 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
9225                            struct btrfs_root *root, int overwrite)
9226 {
9227         struct extent_buffer *c;
9228         struct extent_buffer *old = root->node;
9229         int level;
9230         int ret;
9231         struct btrfs_disk_key disk_key = {0,0,0};
9232
9233         level = 0;
9234
9235         if (overwrite) {
9236                 c = old;
9237                 extent_buffer_get(c);
9238                 goto init;
9239         }
9240         c = btrfs_alloc_free_block(trans, root,
9241                                    root->nodesize,
9242                                    root->root_key.objectid,
9243                                    &disk_key, level, 0, 0);
9244         if (IS_ERR(c)) {
9245                 c = old;
9246                 extent_buffer_get(c);
9247                 overwrite = 1;
9248         }
9249 init:
9250         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
9251         btrfs_set_header_level(c, level);
9252         btrfs_set_header_bytenr(c, c->start);
9253         btrfs_set_header_generation(c, trans->transid);
9254         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
9255         btrfs_set_header_owner(c, root->root_key.objectid);
9256
9257         write_extent_buffer(c, root->fs_info->fsid,
9258                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
9259
9260         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
9261                             btrfs_header_chunk_tree_uuid(c),
9262                             BTRFS_UUID_SIZE);
9263
9264         btrfs_mark_buffer_dirty(c);
9265         /*
9266          * this case can happen in the following case:
9267          *
9268          * 1.overwrite previous root.
9269          *
9270          * 2.reinit reloc data root, this is because we skip pin
9271          * down reloc data tree before which means we can allocate
9272          * same block bytenr here.
9273          */
9274         if (old->start == c->start) {
9275                 btrfs_set_root_generation(&root->root_item,
9276                                           trans->transid);
9277                 root->root_item.level = btrfs_header_level(root->node);
9278                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
9279                                         &root->root_key, &root->root_item);
9280                 if (ret) {
9281                         free_extent_buffer(c);
9282                         return ret;
9283                 }
9284         }
9285         free_extent_buffer(old);
9286         root->node = c;
9287         add_root_to_dirty_list(root);
9288         return 0;
9289 }
9290
9291 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
9292                                 struct extent_buffer *eb, int tree_root)
9293 {
9294         struct extent_buffer *tmp;
9295         struct btrfs_root_item *ri;
9296         struct btrfs_key key;
9297         u64 bytenr;
9298         u32 nodesize;
9299         int level = btrfs_header_level(eb);
9300         int nritems;
9301         int ret;
9302         int i;
9303
9304         /*
9305          * If we have pinned this block before, don't pin it again.
9306          * This can not only avoid forever loop with broken filesystem
9307          * but also give us some speedups.
9308          */
9309         if (test_range_bit(&fs_info->pinned_extents, eb->start,
9310                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
9311                 return 0;
9312
9313         btrfs_pin_extent(fs_info, eb->start, eb->len);
9314
9315         nodesize = btrfs_super_nodesize(fs_info->super_copy);
9316         nritems = btrfs_header_nritems(eb);
9317         for (i = 0; i < nritems; i++) {
9318                 if (level == 0) {
9319                         btrfs_item_key_to_cpu(eb, &key, i);
9320                         if (key.type != BTRFS_ROOT_ITEM_KEY)
9321                                 continue;
9322                         /* Skip the extent root and reloc roots */
9323                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
9324                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
9325                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
9326                                 continue;
9327                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
9328                         bytenr = btrfs_disk_root_bytenr(eb, ri);
9329
9330                         /*
9331                          * If at any point we start needing the real root we
9332                          * will have to build a stump root for the root we are
9333                          * in, but for now this doesn't actually use the root so
9334                          * just pass in extent_root.
9335                          */
9336                         tmp = read_tree_block(fs_info->extent_root, bytenr,
9337                                               nodesize, 0);
9338                         if (!extent_buffer_uptodate(tmp)) {
9339                                 fprintf(stderr, "Error reading root block\n");
9340                                 return -EIO;
9341                         }
9342                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
9343                         free_extent_buffer(tmp);
9344                         if (ret)
9345                                 return ret;
9346                 } else {
9347                         bytenr = btrfs_node_blockptr(eb, i);
9348
9349                         /* If we aren't the tree root don't read the block */
9350                         if (level == 1 && !tree_root) {
9351                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
9352                                 continue;
9353                         }
9354
9355                         tmp = read_tree_block(fs_info->extent_root, bytenr,
9356                                               nodesize, 0);
9357                         if (!extent_buffer_uptodate(tmp)) {
9358                                 fprintf(stderr, "Error reading tree block\n");
9359                                 return -EIO;
9360                         }
9361                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
9362                         free_extent_buffer(tmp);
9363                         if (ret)
9364                                 return ret;
9365                 }
9366         }
9367
9368         return 0;
9369 }
9370
9371 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
9372 {
9373         int ret;
9374
9375         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
9376         if (ret)
9377                 return ret;
9378
9379         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
9380 }
9381
9382 static int reset_block_groups(struct btrfs_fs_info *fs_info)
9383 {
9384         struct btrfs_block_group_cache *cache;
9385         struct btrfs_path *path;
9386         struct extent_buffer *leaf;
9387         struct btrfs_chunk *chunk;
9388         struct btrfs_key key;
9389         int ret;
9390         u64 start;
9391
9392         path = btrfs_alloc_path();
9393         if (!path)
9394                 return -ENOMEM;
9395
9396         key.objectid = 0;
9397         key.type = BTRFS_CHUNK_ITEM_KEY;
9398         key.offset = 0;
9399
9400         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
9401         if (ret < 0) {
9402                 btrfs_free_path(path);
9403                 return ret;
9404         }
9405
9406         /*
9407          * We do this in case the block groups were screwed up and had alloc
9408          * bits that aren't actually set on the chunks.  This happens with
9409          * restored images every time and could happen in real life I guess.
9410          */
9411         fs_info->avail_data_alloc_bits = 0;
9412         fs_info->avail_metadata_alloc_bits = 0;
9413         fs_info->avail_system_alloc_bits = 0;
9414
9415         /* First we need to create the in-memory block groups */
9416         while (1) {
9417                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9418                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
9419                         if (ret < 0) {
9420                                 btrfs_free_path(path);
9421                                 return ret;
9422                         }
9423                         if (ret) {
9424                                 ret = 0;
9425                                 break;
9426                         }
9427                 }
9428                 leaf = path->nodes[0];
9429                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9430                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
9431                         path->slots[0]++;
9432                         continue;
9433                 }
9434
9435                 chunk = btrfs_item_ptr(leaf, path->slots[0],
9436                                        struct btrfs_chunk);
9437                 btrfs_add_block_group(fs_info, 0,
9438                                       btrfs_chunk_type(leaf, chunk),
9439                                       key.objectid, key.offset,
9440                                       btrfs_chunk_length(leaf, chunk));
9441                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
9442                                  key.offset + btrfs_chunk_length(leaf, chunk),
9443                                  GFP_NOFS);
9444                 path->slots[0]++;
9445         }
9446         start = 0;
9447         while (1) {
9448                 cache = btrfs_lookup_first_block_group(fs_info, start);
9449                 if (!cache)
9450                         break;
9451                 cache->cached = 1;
9452                 start = cache->key.objectid + cache->key.offset;
9453         }
9454
9455         btrfs_free_path(path);
9456         return 0;
9457 }
9458
9459 static int reset_balance(struct btrfs_trans_handle *trans,
9460                          struct btrfs_fs_info *fs_info)
9461 {
9462         struct btrfs_root *root = fs_info->tree_root;
9463         struct btrfs_path *path;
9464         struct extent_buffer *leaf;
9465         struct btrfs_key key;
9466         int del_slot, del_nr = 0;
9467         int ret;
9468         int found = 0;
9469
9470         path = btrfs_alloc_path();
9471         if (!path)
9472                 return -ENOMEM;
9473
9474         key.objectid = BTRFS_BALANCE_OBJECTID;
9475         key.type = BTRFS_BALANCE_ITEM_KEY;
9476         key.offset = 0;
9477
9478         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9479         if (ret) {
9480                 if (ret > 0)
9481                         ret = 0;
9482                 if (!ret)
9483                         goto reinit_data_reloc;
9484                 else
9485                         goto out;
9486         }
9487
9488         ret = btrfs_del_item(trans, root, path);
9489         if (ret)
9490                 goto out;
9491         btrfs_release_path(path);
9492
9493         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
9494         key.type = BTRFS_ROOT_ITEM_KEY;
9495         key.offset = 0;
9496
9497         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9498         if (ret < 0)
9499                 goto out;
9500         while (1) {
9501                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9502                         if (!found)
9503                                 break;
9504
9505                         if (del_nr) {
9506                                 ret = btrfs_del_items(trans, root, path,
9507                                                       del_slot, del_nr);
9508                                 del_nr = 0;
9509                                 if (ret)
9510                                         goto out;
9511                         }
9512                         key.offset++;
9513                         btrfs_release_path(path);
9514
9515                         found = 0;
9516                         ret = btrfs_search_slot(trans, root, &key, path,
9517                                                 -1, 1);
9518                         if (ret < 0)
9519                                 goto out;
9520                         continue;
9521                 }
9522                 found = 1;
9523                 leaf = path->nodes[0];
9524                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9525                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
9526                         break;
9527                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9528                         path->slots[0]++;
9529                         continue;
9530                 }
9531                 if (!del_nr) {
9532                         del_slot = path->slots[0];
9533                         del_nr = 1;
9534                 } else {
9535                         del_nr++;
9536                 }
9537                 path->slots[0]++;
9538         }
9539
9540         if (del_nr) {
9541                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
9542                 if (ret)
9543                         goto out;
9544         }
9545         btrfs_release_path(path);
9546
9547 reinit_data_reloc:
9548         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
9549         key.type = BTRFS_ROOT_ITEM_KEY;
9550         key.offset = (u64)-1;
9551         root = btrfs_read_fs_root(fs_info, &key);
9552         if (IS_ERR(root)) {
9553                 fprintf(stderr, "Error reading data reloc tree\n");
9554                 ret = PTR_ERR(root);
9555                 goto out;
9556         }
9557         record_root_in_trans(trans, root);
9558         ret = btrfs_fsck_reinit_root(trans, root, 0);
9559         if (ret)
9560                 goto out;
9561         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
9562 out:
9563         btrfs_free_path(path);
9564         return ret;
9565 }
9566
9567 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
9568                               struct btrfs_fs_info *fs_info)
9569 {
9570         u64 start = 0;
9571         int ret;
9572
9573         /*
9574          * The only reason we don't do this is because right now we're just
9575          * walking the trees we find and pinning down their bytes, we don't look
9576          * at any of the leaves.  In order to do mixed groups we'd have to check
9577          * the leaves of any fs roots and pin down the bytes for any file
9578          * extents we find.  Not hard but why do it if we don't have to?
9579          */
9580         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
9581                 fprintf(stderr, "We don't support re-initing the extent tree "
9582                         "for mixed block groups yet, please notify a btrfs "
9583                         "developer you want to do this so they can add this "
9584                         "functionality.\n");
9585                 return -EINVAL;
9586         }
9587
9588         /*
9589          * first we need to walk all of the trees except the extent tree and pin
9590          * down the bytes that are in use so we don't overwrite any existing
9591          * metadata.
9592          */
9593         ret = pin_metadata_blocks(fs_info);
9594         if (ret) {
9595                 fprintf(stderr, "error pinning down used bytes\n");
9596                 return ret;
9597         }
9598
9599         /*
9600          * Need to drop all the block groups since we're going to recreate all
9601          * of them again.
9602          */
9603         btrfs_free_block_groups(fs_info);
9604         ret = reset_block_groups(fs_info);
9605         if (ret) {
9606                 fprintf(stderr, "error resetting the block groups\n");
9607                 return ret;
9608         }
9609
9610         /* Ok we can allocate now, reinit the extent root */
9611         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
9612         if (ret) {
9613                 fprintf(stderr, "extent root initialization failed\n");
9614                 /*
9615                  * When the transaction code is updated we should end the
9616                  * transaction, but for now progs only knows about commit so
9617                  * just return an error.
9618                  */
9619                 return ret;
9620         }
9621
9622         /*
9623          * Now we have all the in-memory block groups setup so we can make
9624          * allocations properly, and the metadata we care about is safe since we
9625          * pinned all of it above.
9626          */
9627         while (1) {
9628                 struct btrfs_block_group_cache *cache;
9629
9630                 cache = btrfs_lookup_first_block_group(fs_info, start);
9631                 if (!cache)
9632                         break;
9633                 start = cache->key.objectid + cache->key.offset;
9634                 ret = btrfs_insert_item(trans, fs_info->extent_root,
9635                                         &cache->key, &cache->item,
9636                                         sizeof(cache->item));
9637                 if (ret) {
9638                         fprintf(stderr, "Error adding block group\n");
9639                         return ret;
9640                 }
9641                 btrfs_extent_post_op(trans, fs_info->extent_root);
9642         }
9643
9644         ret = reset_balance(trans, fs_info);
9645         if (ret)
9646                 fprintf(stderr, "error resetting the pending balance\n");
9647
9648         return ret;
9649 }
9650
9651 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
9652 {
9653         struct btrfs_path *path;
9654         struct btrfs_trans_handle *trans;
9655         struct btrfs_key key;
9656         int ret;
9657
9658         printf("Recowing metadata block %llu\n", eb->start);
9659         key.objectid = btrfs_header_owner(eb);
9660         key.type = BTRFS_ROOT_ITEM_KEY;
9661         key.offset = (u64)-1;
9662
9663         root = btrfs_read_fs_root(root->fs_info, &key);
9664         if (IS_ERR(root)) {
9665                 fprintf(stderr, "Couldn't find owner root %llu\n",
9666                         key.objectid);
9667                 return PTR_ERR(root);
9668         }
9669
9670         path = btrfs_alloc_path();
9671         if (!path)
9672                 return -ENOMEM;
9673
9674         trans = btrfs_start_transaction(root, 1);
9675         if (IS_ERR(trans)) {
9676                 btrfs_free_path(path);
9677                 return PTR_ERR(trans);
9678         }
9679
9680         path->lowest_level = btrfs_header_level(eb);
9681         if (path->lowest_level)
9682                 btrfs_node_key_to_cpu(eb, &key, 0);
9683         else
9684                 btrfs_item_key_to_cpu(eb, &key, 0);
9685
9686         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9687         btrfs_commit_transaction(trans, root);
9688         btrfs_free_path(path);
9689         return ret;
9690 }
9691
9692 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
9693 {
9694         struct btrfs_path *path;
9695         struct btrfs_trans_handle *trans;
9696         struct btrfs_key key;
9697         int ret;
9698
9699         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9700                bad->key.type, bad->key.offset);
9701         key.objectid = bad->root_id;
9702         key.type = BTRFS_ROOT_ITEM_KEY;
9703         key.offset = (u64)-1;
9704
9705         root = btrfs_read_fs_root(root->fs_info, &key);
9706         if (IS_ERR(root)) {
9707                 fprintf(stderr, "Couldn't find owner root %llu\n",
9708                         key.objectid);
9709                 return PTR_ERR(root);
9710         }
9711
9712         path = btrfs_alloc_path();
9713         if (!path)
9714                 return -ENOMEM;
9715
9716         trans = btrfs_start_transaction(root, 1);
9717         if (IS_ERR(trans)) {
9718                 btrfs_free_path(path);
9719                 return PTR_ERR(trans);
9720         }
9721
9722         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9723         if (ret) {
9724                 if (ret > 0)
9725                         ret = 0;
9726                 goto out;
9727         }
9728         ret = btrfs_del_item(trans, root, path);
9729 out:
9730         btrfs_commit_transaction(trans, root);
9731         btrfs_free_path(path);
9732         return ret;
9733 }
9734
9735 static int zero_log_tree(struct btrfs_root *root)
9736 {
9737         struct btrfs_trans_handle *trans;
9738         int ret;
9739
9740         trans = btrfs_start_transaction(root, 1);
9741         if (IS_ERR(trans)) {
9742                 ret = PTR_ERR(trans);
9743                 return ret;
9744         }
9745         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9746         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9747         ret = btrfs_commit_transaction(trans, root);
9748         return ret;
9749 }
9750
9751 static int populate_csum(struct btrfs_trans_handle *trans,
9752                          struct btrfs_root *csum_root, char *buf, u64 start,
9753                          u64 len)
9754 {
9755         u64 offset = 0;
9756         u64 sectorsize;
9757         int ret = 0;
9758
9759         while (offset < len) {
9760                 sectorsize = csum_root->sectorsize;
9761                 ret = read_extent_data(csum_root, buf, start + offset,
9762                                        &sectorsize, 0);
9763                 if (ret)
9764                         break;
9765                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9766                                             start + offset, buf, sectorsize);
9767                 if (ret)
9768                         break;
9769                 offset += sectorsize;
9770         }
9771         return ret;
9772 }
9773
9774 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9775                                       struct btrfs_root *csum_root,
9776                                       struct btrfs_root *cur_root)
9777 {
9778         struct btrfs_path *path;
9779         struct btrfs_key key;
9780         struct extent_buffer *node;
9781         struct btrfs_file_extent_item *fi;
9782         char *buf = NULL;
9783         u64 start = 0;
9784         u64 len = 0;
9785         int slot = 0;
9786         int ret = 0;
9787
9788         path = btrfs_alloc_path();
9789         if (!path)
9790                 return -ENOMEM;
9791         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9792         if (!buf) {
9793                 ret = -ENOMEM;
9794                 goto out;
9795         }
9796
9797         key.objectid = 0;
9798         key.offset = 0;
9799         key.type = 0;
9800
9801         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9802         if (ret < 0)
9803                 goto out;
9804         /* Iterate all regular file extents and fill its csum */
9805         while (1) {
9806                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9807
9808                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9809                         goto next;
9810                 node = path->nodes[0];
9811                 slot = path->slots[0];
9812                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9813                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9814                         goto next;
9815                 start = btrfs_file_extent_disk_bytenr(node, fi);
9816                 len = btrfs_file_extent_disk_num_bytes(node, fi);
9817
9818                 ret = populate_csum(trans, csum_root, buf, start, len);
9819                 if (ret == -EEXIST)
9820                         ret = 0;
9821                 if (ret < 0)
9822                         goto out;
9823 next:
9824                 /*
9825                  * TODO: if next leaf is corrupted, jump to nearest next valid
9826                  * leaf.
9827                  */
9828                 ret = btrfs_next_item(cur_root, path);
9829                 if (ret < 0)
9830                         goto out;
9831                 if (ret > 0) {
9832                         ret = 0;
9833                         goto out;
9834                 }
9835         }
9836
9837 out:
9838         btrfs_free_path(path);
9839         free(buf);
9840         return ret;
9841 }
9842
9843 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9844                                   struct btrfs_root *csum_root)
9845 {
9846         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9847         struct btrfs_path *path;
9848         struct btrfs_root *tree_root = fs_info->tree_root;
9849         struct btrfs_root *cur_root;
9850         struct extent_buffer *node;
9851         struct btrfs_key key;
9852         int slot = 0;
9853         int ret = 0;
9854
9855         path = btrfs_alloc_path();
9856         if (!path)
9857                 return -ENOMEM;
9858
9859         key.objectid = BTRFS_FS_TREE_OBJECTID;
9860         key.offset = 0;
9861         key.type = BTRFS_ROOT_ITEM_KEY;
9862
9863         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9864         if (ret < 0)
9865                 goto out;
9866         if (ret > 0) {
9867                 ret = -ENOENT;
9868                 goto out;
9869         }
9870
9871         while (1) {
9872                 node = path->nodes[0];
9873                 slot = path->slots[0];
9874                 btrfs_item_key_to_cpu(node, &key, slot);
9875                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9876                         goto out;
9877                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9878                         goto next;
9879                 if (!is_fstree(key.objectid))
9880                         goto next;
9881                 key.offset = (u64)-1;
9882
9883                 cur_root = btrfs_read_fs_root(fs_info, &key);
9884                 if (IS_ERR(cur_root) || !cur_root) {
9885                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9886                                 key.objectid);
9887                         goto out;
9888                 }
9889                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9890                                 cur_root);
9891                 if (ret < 0)
9892                         goto out;
9893 next:
9894                 ret = btrfs_next_item(tree_root, path);
9895                 if (ret > 0) {
9896                         ret = 0;
9897                         goto out;
9898                 }
9899                 if (ret < 0)
9900                         goto out;
9901         }
9902
9903 out:
9904         btrfs_free_path(path);
9905         return ret;
9906 }
9907
9908 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9909                                       struct btrfs_root *csum_root)
9910 {
9911         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9912         struct btrfs_path *path;
9913         struct btrfs_extent_item *ei;
9914         struct extent_buffer *leaf;
9915         char *buf;
9916         struct btrfs_key key;
9917         int ret;
9918
9919         path = btrfs_alloc_path();
9920         if (!path)
9921                 return -ENOMEM;
9922
9923         key.objectid = 0;
9924         key.type = BTRFS_EXTENT_ITEM_KEY;
9925         key.offset = 0;
9926
9927         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9928         if (ret < 0) {
9929                 btrfs_free_path(path);
9930                 return ret;
9931         }
9932
9933         buf = malloc(csum_root->sectorsize);
9934         if (!buf) {
9935                 btrfs_free_path(path);
9936                 return -ENOMEM;
9937         }
9938
9939         while (1) {
9940                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9941                         ret = btrfs_next_leaf(extent_root, path);
9942                         if (ret < 0)
9943                                 break;
9944                         if (ret) {
9945                                 ret = 0;
9946                                 break;
9947                         }
9948                 }
9949                 leaf = path->nodes[0];
9950
9951                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9952                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9953                         path->slots[0]++;
9954                         continue;
9955                 }
9956
9957                 ei = btrfs_item_ptr(leaf, path->slots[0],
9958                                     struct btrfs_extent_item);
9959                 if (!(btrfs_extent_flags(leaf, ei) &
9960                       BTRFS_EXTENT_FLAG_DATA)) {
9961                         path->slots[0]++;
9962                         continue;
9963                 }
9964
9965                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9966                                     key.offset);
9967                 if (ret)
9968                         break;
9969                 path->slots[0]++;
9970         }
9971
9972         btrfs_free_path(path);
9973         free(buf);
9974         return ret;
9975 }
9976
9977 /*
9978  * Recalculate the csum and put it into the csum tree.
9979  *
9980  * Extent tree init will wipe out all the extent info, so in that case, we
9981  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9982  * will use fs/subvol trees to init the csum tree.
9983  */
9984 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9985                           struct btrfs_root *csum_root,
9986                           int search_fs_tree)
9987 {
9988         if (search_fs_tree)
9989                 return fill_csum_tree_from_fs(trans, csum_root);
9990         else
9991                 return fill_csum_tree_from_extent(trans, csum_root);
9992 }
9993
9994 static void free_roots_info_cache(void)
9995 {
9996         if (!roots_info_cache)
9997                 return;
9998
9999         while (!cache_tree_empty(roots_info_cache)) {
10000                 struct cache_extent *entry;
10001                 struct root_item_info *rii;
10002
10003                 entry = first_cache_extent(roots_info_cache);
10004                 if (!entry)
10005                         break;
10006                 remove_cache_extent(roots_info_cache, entry);
10007                 rii = container_of(entry, struct root_item_info, cache_extent);
10008                 free(rii);
10009         }
10010
10011         free(roots_info_cache);
10012         roots_info_cache = NULL;
10013 }
10014
10015 static int build_roots_info_cache(struct btrfs_fs_info *info)
10016 {
10017         int ret = 0;
10018         struct btrfs_key key;
10019         struct extent_buffer *leaf;
10020         struct btrfs_path *path;
10021
10022         if (!roots_info_cache) {
10023                 roots_info_cache = malloc(sizeof(*roots_info_cache));
10024                 if (!roots_info_cache)
10025                         return -ENOMEM;
10026                 cache_tree_init(roots_info_cache);
10027         }
10028
10029         path = btrfs_alloc_path();
10030         if (!path)
10031                 return -ENOMEM;
10032
10033         key.objectid = 0;
10034         key.type = BTRFS_EXTENT_ITEM_KEY;
10035         key.offset = 0;
10036
10037         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10038         if (ret < 0)
10039                 goto out;
10040         leaf = path->nodes[0];
10041
10042         while (1) {
10043                 struct btrfs_key found_key;
10044                 struct btrfs_extent_item *ei;
10045                 struct btrfs_extent_inline_ref *iref;
10046                 int slot = path->slots[0];
10047                 int type;
10048                 u64 flags;
10049                 u64 root_id;
10050                 u8 level;
10051                 struct cache_extent *entry;
10052                 struct root_item_info *rii;
10053
10054                 if (slot >= btrfs_header_nritems(leaf)) {
10055                         ret = btrfs_next_leaf(info->extent_root, path);
10056                         if (ret < 0) {
10057                                 break;
10058                         } else if (ret) {
10059                                 ret = 0;
10060                                 break;
10061                         }
10062                         leaf = path->nodes[0];
10063                         slot = path->slots[0];
10064                 }
10065
10066                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10067
10068                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10069                     found_key.type != BTRFS_METADATA_ITEM_KEY)
10070                         goto next;
10071
10072                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10073                 flags = btrfs_extent_flags(leaf, ei);
10074
10075                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10076                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10077                         goto next;
10078
10079                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10080                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10081                         level = found_key.offset;
10082                 } else {
10083                         struct btrfs_tree_block_info *binfo;
10084
10085                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
10086                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10087                         level = btrfs_tree_block_level(leaf, binfo);
10088                 }
10089
10090                 /*
10091                  * For a root extent, it must be of the following type and the
10092                  * first (and only one) iref in the item.
10093                  */
10094                 type = btrfs_extent_inline_ref_type(leaf, iref);
10095                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10096                         goto next;
10097
10098                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10099                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10100                 if (!entry) {
10101                         rii = malloc(sizeof(struct root_item_info));
10102                         if (!rii) {
10103                                 ret = -ENOMEM;
10104                                 goto out;
10105                         }
10106                         rii->cache_extent.start = root_id;
10107                         rii->cache_extent.size = 1;
10108                         rii->level = (u8)-1;
10109                         entry = &rii->cache_extent;
10110                         ret = insert_cache_extent(roots_info_cache, entry);
10111                         ASSERT(ret == 0);
10112                 } else {
10113                         rii = container_of(entry, struct root_item_info,
10114                                            cache_extent);
10115                 }
10116
10117                 ASSERT(rii->cache_extent.start == root_id);
10118                 ASSERT(rii->cache_extent.size == 1);
10119
10120                 if (level > rii->level || rii->level == (u8)-1) {
10121                         rii->level = level;
10122                         rii->bytenr = found_key.objectid;
10123                         rii->gen = btrfs_extent_generation(leaf, ei);
10124                         rii->node_count = 1;
10125                 } else if (level == rii->level) {
10126                         rii->node_count++;
10127                 }
10128 next:
10129                 path->slots[0]++;
10130         }
10131
10132 out:
10133         btrfs_free_path(path);
10134
10135         return ret;
10136 }
10137
10138 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10139                                   struct btrfs_path *path,
10140                                   const struct btrfs_key *root_key,
10141                                   const int read_only_mode)
10142 {
10143         const u64 root_id = root_key->objectid;
10144         struct cache_extent *entry;
10145         struct root_item_info *rii;
10146         struct btrfs_root_item ri;
10147         unsigned long offset;
10148
10149         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10150         if (!entry) {
10151                 fprintf(stderr,
10152                         "Error: could not find extent items for root %llu\n",
10153                         root_key->objectid);
10154                 return -ENOENT;
10155         }
10156
10157         rii = container_of(entry, struct root_item_info, cache_extent);
10158         ASSERT(rii->cache_extent.start == root_id);
10159         ASSERT(rii->cache_extent.size == 1);
10160
10161         if (rii->node_count != 1) {
10162                 fprintf(stderr,
10163                         "Error: could not find btree root extent for root %llu\n",
10164                         root_id);
10165                 return -ENOENT;
10166         }
10167
10168         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
10169         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
10170
10171         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
10172             btrfs_root_level(&ri) != rii->level ||
10173             btrfs_root_generation(&ri) != rii->gen) {
10174
10175                 /*
10176                  * If we're in repair mode but our caller told us to not update
10177                  * the root item, i.e. just check if it needs to be updated, don't
10178                  * print this message, since the caller will call us again shortly
10179                  * for the same root item without read only mode (the caller will
10180                  * open a transaction first).
10181                  */
10182                 if (!(read_only_mode && repair))
10183                         fprintf(stderr,
10184                                 "%sroot item for root %llu,"
10185                                 " current bytenr %llu, current gen %llu, current level %u,"
10186                                 " new bytenr %llu, new gen %llu, new level %u\n",
10187                                 (read_only_mode ? "" : "fixing "),
10188                                 root_id,
10189                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10190                                 btrfs_root_level(&ri),
10191                                 rii->bytenr, rii->gen, rii->level);
10192
10193                 if (btrfs_root_generation(&ri) > rii->gen) {
10194                         fprintf(stderr,
10195                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
10196                                 root_id, btrfs_root_generation(&ri), rii->gen);
10197                         return -EINVAL;
10198                 }
10199
10200                 if (!read_only_mode) {
10201                         btrfs_set_root_bytenr(&ri, rii->bytenr);
10202                         btrfs_set_root_level(&ri, rii->level);
10203                         btrfs_set_root_generation(&ri, rii->gen);
10204                         write_extent_buffer(path->nodes[0], &ri,
10205                                             offset, sizeof(ri));
10206                 }
10207
10208                 return 1;
10209         }
10210
10211         return 0;
10212 }
10213
10214 /*
10215  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
10216  * caused read-only snapshots to be corrupted if they were created at a moment
10217  * when the source subvolume/snapshot had orphan items. The issue was that the
10218  * on-disk root items became incorrect, referring to the pre orphan cleanup root
10219  * node instead of the post orphan cleanup root node.
10220  * So this function, and its callees, just detects and fixes those cases. Even
10221  * though the regression was for read-only snapshots, this function applies to
10222  * any snapshot/subvolume root.
10223  * This must be run before any other repair code - not doing it so, makes other
10224  * repair code delete or modify backrefs in the extent tree for example, which
10225  * will result in an inconsistent fs after repairing the root items.
10226  */
10227 static int repair_root_items(struct btrfs_fs_info *info)
10228 {
10229         struct btrfs_path *path = NULL;
10230         struct btrfs_key key;
10231         struct extent_buffer *leaf;
10232         struct btrfs_trans_handle *trans = NULL;
10233         int ret = 0;
10234         int bad_roots = 0;
10235         int need_trans = 0;
10236
10237         ret = build_roots_info_cache(info);
10238         if (ret)
10239                 goto out;
10240
10241         path = btrfs_alloc_path();
10242         if (!path) {
10243                 ret = -ENOMEM;
10244                 goto out;
10245         }
10246
10247         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
10248         key.type = BTRFS_ROOT_ITEM_KEY;
10249         key.offset = 0;
10250
10251 again:
10252         /*
10253          * Avoid opening and committing transactions if a leaf doesn't have
10254          * any root items that need to be fixed, so that we avoid rotating
10255          * backup roots unnecessarily.
10256          */
10257         if (need_trans) {
10258                 trans = btrfs_start_transaction(info->tree_root, 1);
10259                 if (IS_ERR(trans)) {
10260                         ret = PTR_ERR(trans);
10261                         goto out;
10262                 }
10263         }
10264
10265         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
10266                                 0, trans ? 1 : 0);
10267         if (ret < 0)
10268                 goto out;
10269         leaf = path->nodes[0];
10270
10271         while (1) {
10272                 struct btrfs_key found_key;
10273
10274                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
10275                         int no_more_keys = find_next_key(path, &key);
10276
10277                         btrfs_release_path(path);
10278                         if (trans) {
10279                                 ret = btrfs_commit_transaction(trans,
10280                                                                info->tree_root);
10281                                 trans = NULL;
10282                                 if (ret < 0)
10283                                         goto out;
10284                         }
10285                         need_trans = 0;
10286                         if (no_more_keys)
10287                                 break;
10288                         goto again;
10289                 }
10290
10291                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10292
10293                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
10294                         goto next;
10295                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
10296                         goto next;
10297
10298                 ret = maybe_repair_root_item(info, path, &found_key,
10299                                              trans ? 0 : 1);
10300                 if (ret < 0)
10301                         goto out;
10302                 if (ret) {
10303                         if (!trans && repair) {
10304                                 need_trans = 1;
10305                                 key = found_key;
10306                                 btrfs_release_path(path);
10307                                 goto again;
10308                         }
10309                         bad_roots++;
10310                 }
10311 next:
10312                 path->slots[0]++;
10313         }
10314         ret = 0;
10315 out:
10316         free_roots_info_cache();
10317         btrfs_free_path(path);
10318         if (trans)
10319                 btrfs_commit_transaction(trans, info->tree_root);
10320         if (ret < 0)
10321                 return ret;
10322
10323         return bad_roots;
10324 }
10325
10326 const char * const cmd_check_usage[] = {
10327         "btrfs check [options] <device>",
10328         "Check structural integrity of a filesystem (unmounted).",
10329         "Check structural integrity of an unmounted filesystem. Verify internal",
10330         "trees' consistency and item connectivity. In the repair mode try to",
10331         "fix the problems found.",
10332         "WARNING: the repair mode is considered dangerous",
10333         "",
10334         "-s|--super <superblock>     use this superblock copy",
10335         "-b|--backup                 use the first valid backup root copy",
10336         "--repair                    try to repair the filesystem",
10337         "--readonly                  run in read-only mode (default)",
10338         "--init-csum-tree            create a new CRC tree",
10339         "--init-extent-tree          create a new extent tree",
10340         "--check-data-csum           verify checksums of data blocks",
10341         "-Q|--qgroup-report           print a report on qgroup consistency",
10342         "-E|--subvol-extents <subvolid>",
10343         "                            print subvolume extents and sharing state",
10344         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
10345         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
10346         "-p|--progress               indicate progress",
10347         NULL
10348 };
10349
10350 int cmd_check(int argc, char **argv)
10351 {
10352         struct cache_tree root_cache;
10353         struct btrfs_root *root;
10354         struct btrfs_fs_info *info;
10355         u64 bytenr = 0;
10356         u64 subvolid = 0;
10357         u64 tree_root_bytenr = 0;
10358         u64 chunk_root_bytenr = 0;
10359         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
10360         int ret;
10361         u64 num;
10362         int init_csum_tree = 0;
10363         int readonly = 0;
10364         int qgroup_report = 0;
10365         int qgroups_repaired = 0;
10366         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
10367
10368         while(1) {
10369                 int c;
10370                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
10371                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
10372                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
10373                 static const struct option long_options[] = {
10374                         { "super", required_argument, NULL, 's' },
10375                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
10376                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
10377                         { "init-csum-tree", no_argument, NULL,
10378                                 GETOPT_VAL_INIT_CSUM },
10379                         { "init-extent-tree", no_argument, NULL,
10380                                 GETOPT_VAL_INIT_EXTENT },
10381                         { "check-data-csum", no_argument, NULL,
10382                                 GETOPT_VAL_CHECK_CSUM },
10383                         { "backup", no_argument, NULL, 'b' },
10384                         { "subvol-extents", required_argument, NULL, 'E' },
10385                         { "qgroup-report", no_argument, NULL, 'Q' },
10386                         { "tree-root", required_argument, NULL, 'r' },
10387                         { "chunk-root", required_argument, NULL,
10388                                 GETOPT_VAL_CHUNK_TREE },
10389                         { "progress", no_argument, NULL, 'p' },
10390                         { NULL, 0, NULL, 0}
10391                 };
10392
10393                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
10394                 if (c < 0)
10395                         break;
10396                 switch(c) {
10397                         case 'a': /* ignored */ break;
10398                         case 'b':
10399                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
10400                                 break;
10401                         case 's':
10402                                 num = arg_strtou64(optarg);
10403                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
10404                                         fprintf(stderr,
10405                                                 "ERROR: super mirror should be less than: %d\n",
10406                                                 BTRFS_SUPER_MIRROR_MAX);
10407                                         exit(1);
10408                                 }
10409                                 bytenr = btrfs_sb_offset(((int)num));
10410                                 printf("using SB copy %llu, bytenr %llu\n", num,
10411                                        (unsigned long long)bytenr);
10412                                 break;
10413                         case 'Q':
10414                                 qgroup_report = 1;
10415                                 break;
10416                         case 'E':
10417                                 subvolid = arg_strtou64(optarg);
10418                                 break;
10419                         case 'r':
10420                                 tree_root_bytenr = arg_strtou64(optarg);
10421                                 break;
10422                         case GETOPT_VAL_CHUNK_TREE:
10423                                 chunk_root_bytenr = arg_strtou64(optarg);
10424                                 break;
10425                         case 'p':
10426                                 ctx.progress_enabled = true;
10427                                 break;
10428                         case '?':
10429                         case 'h':
10430                                 usage(cmd_check_usage);
10431                         case GETOPT_VAL_REPAIR:
10432                                 printf("enabling repair mode\n");
10433                                 repair = 1;
10434                                 ctree_flags |= OPEN_CTREE_WRITES;
10435                                 break;
10436                         case GETOPT_VAL_READONLY:
10437                                 readonly = 1;
10438                                 break;
10439                         case GETOPT_VAL_INIT_CSUM:
10440                                 printf("Creating a new CRC tree\n");
10441                                 init_csum_tree = 1;
10442                                 repair = 1;
10443                                 ctree_flags |= OPEN_CTREE_WRITES;
10444                                 break;
10445                         case GETOPT_VAL_INIT_EXTENT:
10446                                 init_extent_tree = 1;
10447                                 ctree_flags |= (OPEN_CTREE_WRITES |
10448                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
10449                                 repair = 1;
10450                                 break;
10451                         case GETOPT_VAL_CHECK_CSUM:
10452                                 check_data_csum = 1;
10453                                 break;
10454                 }
10455         }
10456
10457         if (check_argc_exact(argc - optind, 1))
10458                 usage(cmd_check_usage);
10459
10460         if (ctx.progress_enabled) {
10461                 ctx.tp = TASK_NOTHING;
10462                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
10463         }
10464
10465         /* This check is the only reason for --readonly to exist */
10466         if (readonly && repair) {
10467                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
10468                 exit(1);
10469         }
10470
10471         radix_tree_init();
10472         cache_tree_init(&root_cache);
10473
10474         if((ret = check_mounted(argv[optind])) < 0) {
10475                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
10476                 goto err_out;
10477         } else if(ret) {
10478                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
10479                 ret = -EBUSY;
10480                 goto err_out;
10481         }
10482
10483         /* only allow partial opening under repair mode */
10484         if (repair)
10485                 ctree_flags |= OPEN_CTREE_PARTIAL;
10486
10487         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
10488                                   chunk_root_bytenr, ctree_flags);
10489         if (!info) {
10490                 fprintf(stderr, "Couldn't open file system\n");
10491                 ret = -EIO;
10492                 goto err_out;
10493         }
10494
10495         global_info = info;
10496         root = info->fs_root;
10497
10498         /*
10499          * repair mode will force us to commit transaction which
10500          * will make us fail to load log tree when mounting.
10501          */
10502         if (repair && btrfs_super_log_root(info->super_copy)) {
10503                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
10504                 if (!ret) {
10505                         ret = 1;
10506                         goto close_out;
10507                 }
10508                 ret = zero_log_tree(root);
10509                 if (ret) {
10510                         fprintf(stderr, "fail to zero log tree\n");
10511                         goto close_out;
10512                 }
10513         }
10514
10515         uuid_unparse(info->super_copy->fsid, uuidbuf);
10516         if (qgroup_report) {
10517                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
10518                        uuidbuf);
10519                 ret = qgroup_verify_all(info);
10520                 if (ret == 0)
10521                         report_qgroups(1);
10522                 goto close_out;
10523         }
10524         if (subvolid) {
10525                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
10526                        subvolid, argv[optind], uuidbuf);
10527                 ret = print_extent_state(info, subvolid);
10528                 goto close_out;
10529         }
10530         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
10531
10532         if (!extent_buffer_uptodate(info->tree_root->node) ||
10533             !extent_buffer_uptodate(info->dev_root->node) ||
10534             !extent_buffer_uptodate(info->chunk_root->node)) {
10535                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10536                 ret = -EIO;
10537                 goto close_out;
10538         }
10539
10540         if (init_extent_tree || init_csum_tree) {
10541                 struct btrfs_trans_handle *trans;
10542
10543                 trans = btrfs_start_transaction(info->extent_root, 0);
10544                 if (IS_ERR(trans)) {
10545                         fprintf(stderr, "Error starting transaction\n");
10546                         ret = PTR_ERR(trans);
10547                         goto close_out;
10548                 }
10549
10550                 if (init_extent_tree) {
10551                         printf("Creating a new extent tree\n");
10552                         ret = reinit_extent_tree(trans, info);
10553                         if (ret)
10554                                 goto close_out;
10555                 }
10556
10557                 if (init_csum_tree) {
10558                         fprintf(stderr, "Reinit crc root\n");
10559                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
10560                         if (ret) {
10561                                 fprintf(stderr, "crc root initialization failed\n");
10562                                 ret = -EIO;
10563                                 goto close_out;
10564                         }
10565
10566                         ret = fill_csum_tree(trans, info->csum_root,
10567                                              init_extent_tree);
10568                         if (ret) {
10569                                 fprintf(stderr, "crc refilling failed\n");
10570                                 return -EIO;
10571                         }
10572                 }
10573                 /*
10574                  * Ok now we commit and run the normal fsck, which will add
10575                  * extent entries for all of the items it finds.
10576                  */
10577                 ret = btrfs_commit_transaction(trans, info->extent_root);
10578                 if (ret)
10579                         goto close_out;
10580         }
10581         if (!extent_buffer_uptodate(info->extent_root->node)) {
10582                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10583                 ret = -EIO;
10584                 goto close_out;
10585         }
10586         if (!extent_buffer_uptodate(info->csum_root->node)) {
10587                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
10588                 ret = -EIO;
10589                 goto close_out;
10590         }
10591
10592         if (!ctx.progress_enabled)
10593                 fprintf(stderr, "checking extents\n");
10594         ret = check_chunks_and_extents(root);
10595         if (ret)
10596                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
10597
10598         ret = repair_root_items(info);
10599         if (ret < 0)
10600                 goto close_out;
10601         if (repair) {
10602                 fprintf(stderr, "Fixed %d roots.\n", ret);
10603                 ret = 0;
10604         } else if (ret > 0) {
10605                 fprintf(stderr,
10606                        "Found %d roots with an outdated root item.\n",
10607                        ret);
10608                 fprintf(stderr,
10609                         "Please run a filesystem check with the option --repair to fix them.\n");
10610                 ret = 1;
10611                 goto close_out;
10612         }
10613
10614         if (!ctx.progress_enabled) {
10615                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
10616                         fprintf(stderr, "checking free space tree\n");
10617                 else
10618                         fprintf(stderr, "checking free space cache\n");
10619         }
10620         ret = check_space_cache(root);
10621         if (ret)
10622                 goto out;
10623
10624         /*
10625          * We used to have to have these hole extents in between our real
10626          * extents so if we don't have this flag set we need to make sure there
10627          * are no gaps in the file extents for inodes, otherwise we can just
10628          * ignore it when this happens.
10629          */
10630         no_holes = btrfs_fs_incompat(root->fs_info,
10631                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
10632         if (!ctx.progress_enabled)
10633                 fprintf(stderr, "checking fs roots\n");
10634         ret = check_fs_roots(root, &root_cache);
10635         if (ret)
10636                 goto out;
10637
10638         fprintf(stderr, "checking csums\n");
10639         ret = check_csums(root);
10640         if (ret)
10641                 goto out;
10642
10643         fprintf(stderr, "checking root refs\n");
10644         ret = check_root_refs(root, &root_cache);
10645         if (ret)
10646                 goto out;
10647
10648         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
10649                 struct extent_buffer *eb;
10650
10651                 eb = list_first_entry(&root->fs_info->recow_ebs,
10652                                       struct extent_buffer, recow);
10653                 list_del_init(&eb->recow);
10654                 ret = recow_extent_buffer(root, eb);
10655                 if (ret)
10656                         break;
10657         }
10658
10659         while (!list_empty(&delete_items)) {
10660                 struct bad_item *bad;
10661
10662                 bad = list_first_entry(&delete_items, struct bad_item, list);
10663                 list_del_init(&bad->list);
10664                 if (repair)
10665                         ret = delete_bad_item(root, bad);
10666                 free(bad);
10667         }
10668
10669         if (info->quota_enabled) {
10670                 int err;
10671                 fprintf(stderr, "checking quota groups\n");
10672                 err = qgroup_verify_all(info);
10673                 if (err)
10674                         goto out;
10675                 report_qgroups(0);
10676                 err = repair_qgroups(info, &qgroups_repaired);
10677                 if (err)
10678                         goto out;
10679         }
10680
10681         if (!list_empty(&root->fs_info->recow_ebs)) {
10682                 fprintf(stderr, "Transid errors in file system\n");
10683                 ret = 1;
10684         }
10685 out:
10686         /* Don't override original ret */
10687         if (!ret && qgroups_repaired)
10688                 ret = qgroups_repaired;
10689
10690         if (found_old_backref) { /*
10691                  * there was a disk format change when mixed
10692                  * backref was in testing tree. The old format
10693                  * existed about one week.
10694                  */
10695                 printf("\n * Found old mixed backref format. "
10696                        "The old format is not supported! *"
10697                        "\n * Please mount the FS in readonly mode, "
10698                        "backup data and re-format the FS. *\n\n");
10699                 ret = 1;
10700         }
10701         printf("found %llu bytes used err is %d\n",
10702                (unsigned long long)bytes_used, ret);
10703         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10704         printf("total tree bytes: %llu\n",
10705                (unsigned long long)total_btree_bytes);
10706         printf("total fs tree bytes: %llu\n",
10707                (unsigned long long)total_fs_tree_bytes);
10708         printf("total extent tree bytes: %llu\n",
10709                (unsigned long long)total_extent_tree_bytes);
10710         printf("btree space waste bytes: %llu\n",
10711                (unsigned long long)btree_space_waste);
10712         printf("file data blocks allocated: %llu\n referenced %llu\n",
10713                 (unsigned long long)data_bytes_allocated,
10714                 (unsigned long long)data_bytes_referenced);
10715
10716         free_qgroup_counts();
10717         free_root_recs_tree(&root_cache);
10718 close_out:
10719         close_ctree(root);
10720 err_out:
10721         if (ctx.progress_enabled)
10722                 task_deinit(ctx.info);
10723
10724         return ret;
10725 }