btrfs-progs: check: supplement extent backref list with rbtree
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 struct extent_backref {
79         struct list_head list;
80         struct rb_node node;
81         unsigned int is_data:1;
82         unsigned int found_extent_tree:1;
83         unsigned int full_backref:1;
84         unsigned int found_ref:1;
85         unsigned int broken:1;
86 };
87
88 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
89 {
90         return list_entry(entry, struct extent_backref, list);
91 }
92
93 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
94 {
95         return rb_entry(node, struct extent_backref, node);
96 }
97
98 struct data_backref {
99         struct extent_backref node;
100         union {
101                 u64 parent;
102                 u64 root;
103         };
104         u64 owner;
105         u64 offset;
106         u64 disk_bytenr;
107         u64 bytes;
108         u64 ram_bytes;
109         u32 num_refs;
110         u32 found_ref;
111 };
112
113 static inline struct data_backref* to_data_backref(struct extent_backref *back)
114 {
115         return container_of(back, struct data_backref, node);
116 }
117
118 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
119 {
120         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
121         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
122         struct data_backref *back1 = to_data_backref(ext1);
123         struct data_backref *back2 = to_data_backref(ext2);
124
125         WARN_ON(!ext1->is_data);
126         WARN_ON(!ext2->is_data);
127
128         /* parent and root are a union, so this covers both */
129         if (back1->parent > back2->parent)
130                 return 1;
131         if (back1->parent < back2->parent)
132                 return -1;
133
134         /* This is a full backref and the parents match. */
135         if (back1->node.full_backref)
136                 return 0;
137
138         if (back1->owner > back2->owner)
139                 return 1;
140         if (back1->owner < back2->owner)
141                 return -1;
142
143         if (back1->offset > back2->offset)
144                 return 1;
145         if (back1->offset < back2->offset)
146                 return -1;
147
148         if (back1->bytes > back2->bytes)
149                 return 1;
150         if (back1->bytes < back2->bytes)
151                 return -1;
152
153         if (back1->found_ref && back2->found_ref) {
154                 if (back1->disk_bytenr > back2->disk_bytenr)
155                         return 1;
156                 if (back1->disk_bytenr < back2->disk_bytenr)
157                         return -1;
158
159                 if (back1->found_ref > back2->found_ref)
160                         return 1;
161                 if (back1->found_ref < back2->found_ref)
162                         return -1;
163         }
164
165         return 0;
166 }
167
168 /*
169  * Much like data_backref, just removed the undetermined members
170  * and change it to use list_head.
171  * During extent scan, it is stored in root->orphan_data_extent.
172  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
173  */
174 struct orphan_data_extent {
175         struct list_head list;
176         u64 root;
177         u64 objectid;
178         u64 offset;
179         u64 disk_bytenr;
180         u64 disk_len;
181 };
182
183 struct tree_backref {
184         struct extent_backref node;
185         union {
186                 u64 parent;
187                 u64 root;
188         };
189 };
190
191 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
192 {
193         return container_of(back, struct tree_backref, node);
194 }
195
196 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
197 {
198         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
199         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
200         struct tree_backref *back1 = to_tree_backref(ext1);
201         struct tree_backref *back2 = to_tree_backref(ext2);
202
203         WARN_ON(ext1->is_data);
204         WARN_ON(ext2->is_data);
205
206         /* parent and root are a union, so this covers both */
207         if (back1->parent > back2->parent)
208                 return 1;
209         if (back1->parent < back2->parent)
210                 return -1;
211
212         return 0;
213 }
214
215 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
216 {
217         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219
220         if (ext1->is_data > ext2->is_data)
221                 return 1;
222
223         if (ext1->is_data < ext2->is_data)
224                 return -1;
225
226         if (ext1->full_backref > ext2->full_backref)
227                 return 1;
228         if (ext1->full_backref < ext2->full_backref)
229                 return -1;
230
231         if (ext1->is_data)
232                 return compare_data_backref(node1, node2);
233         else
234                 return compare_tree_backref(node1, node2);
235 }
236
237 /* Explicit initialization for extent_record::flag_block_full_backref */
238 enum { FLAG_UNSET = 2 };
239
240 struct extent_record {
241         struct list_head backrefs;
242         struct list_head dups;
243         struct rb_root backref_tree;
244         struct list_head list;
245         struct cache_extent cache;
246         struct btrfs_disk_key parent_key;
247         u64 start;
248         u64 max_size;
249         u64 nr;
250         u64 refs;
251         u64 extent_item_refs;
252         u64 generation;
253         u64 parent_generation;
254         u64 info_objectid;
255         u32 num_duplicates;
256         u8 info_level;
257         unsigned int flag_block_full_backref:2;
258         unsigned int found_rec:1;
259         unsigned int content_checked:1;
260         unsigned int owner_ref_checked:1;
261         unsigned int is_root:1;
262         unsigned int metadata:1;
263         unsigned int bad_full_backref:1;
264         unsigned int crossing_stripes:1;
265         unsigned int wrong_chunk_type:1;
266 };
267
268 static inline struct extent_record* to_extent_record(struct list_head *entry)
269 {
270         return container_of(entry, struct extent_record, list);
271 }
272
273 struct inode_backref {
274         struct list_head list;
275         unsigned int found_dir_item:1;
276         unsigned int found_dir_index:1;
277         unsigned int found_inode_ref:1;
278         unsigned int filetype:8;
279         int errors;
280         unsigned int ref_type;
281         u64 dir;
282         u64 index;
283         u16 namelen;
284         char name[0];
285 };
286
287 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
288 {
289         return list_entry(entry, struct inode_backref, list);
290 }
291
292 struct root_item_record {
293         struct list_head list;
294         u64 objectid;
295         u64 bytenr;
296         u64 last_snapshot;
297         u8 level;
298         u8 drop_level;
299         int level_size;
300         struct btrfs_key drop_key;
301 };
302
303 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
304 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
305 #define REF_ERR_NO_INODE_REF            (1 << 2)
306 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
307 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
308 #define REF_ERR_DUP_INODE_REF           (1 << 5)
309 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
310 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
311 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
312 #define REF_ERR_NO_ROOT_REF             (1 << 9)
313 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
314 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
315 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
316
317 struct file_extent_hole {
318         struct rb_node node;
319         u64 start;
320         u64 len;
321 };
322
323 struct inode_record {
324         struct list_head backrefs;
325         unsigned int checked:1;
326         unsigned int merging:1;
327         unsigned int found_inode_item:1;
328         unsigned int found_dir_item:1;
329         unsigned int found_file_extent:1;
330         unsigned int found_csum_item:1;
331         unsigned int some_csum_missing:1;
332         unsigned int nodatasum:1;
333         int errors;
334
335         u64 ino;
336         u32 nlink;
337         u32 imode;
338         u64 isize;
339         u64 nbytes;
340
341         u32 found_link;
342         u64 found_size;
343         u64 extent_start;
344         u64 extent_end;
345         struct rb_root holes;
346         struct list_head orphan_extents;
347
348         u32 refs;
349 };
350
351 #define I_ERR_NO_INODE_ITEM             (1 << 0)
352 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
353 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
354 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
355 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
356 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
357 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
358 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
359 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
360 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
361 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
362 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
363 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
364 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
365 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
366
367 struct root_backref {
368         struct list_head list;
369         unsigned int found_dir_item:1;
370         unsigned int found_dir_index:1;
371         unsigned int found_back_ref:1;
372         unsigned int found_forward_ref:1;
373         unsigned int reachable:1;
374         int errors;
375         u64 ref_root;
376         u64 dir;
377         u64 index;
378         u16 namelen;
379         char name[0];
380 };
381
382 static inline struct root_backref* to_root_backref(struct list_head *entry)
383 {
384         return list_entry(entry, struct root_backref, list);
385 }
386
387 struct root_record {
388         struct list_head backrefs;
389         struct cache_extent cache;
390         unsigned int found_root_item:1;
391         u64 objectid;
392         u32 found_ref;
393 };
394
395 struct ptr_node {
396         struct cache_extent cache;
397         void *data;
398 };
399
400 struct shared_node {
401         struct cache_extent cache;
402         struct cache_tree root_cache;
403         struct cache_tree inode_cache;
404         struct inode_record *current;
405         u32 refs;
406 };
407
408 struct block_info {
409         u64 start;
410         u32 size;
411 };
412
413 struct walk_control {
414         struct cache_tree shared;
415         struct shared_node *nodes[BTRFS_MAX_LEVEL];
416         int active_node;
417         int root_level;
418 };
419
420 struct bad_item {
421         struct btrfs_key key;
422         u64 root_id;
423         struct list_head list;
424 };
425
426 struct extent_entry {
427         u64 bytenr;
428         u64 bytes;
429         int count;
430         int broken;
431         struct list_head list;
432 };
433
434 struct root_item_info {
435         /* level of the root */
436         u8 level;
437         /* number of nodes at this level, must be 1 for a root */
438         int node_count;
439         u64 bytenr;
440         u64 gen;
441         struct cache_extent cache_extent;
442 };
443
444 static void *print_status_check(void *p)
445 {
446         struct task_ctx *priv = p;
447         const char work_indicator[] = { '.', 'o', 'O', 'o' };
448         uint32_t count = 0;
449         static char *task_position_string[] = {
450                 "checking extents",
451                 "checking free space cache",
452                 "checking fs roots",
453         };
454
455         task_period_start(priv->info, 1000 /* 1s */);
456
457         if (priv->tp == TASK_NOTHING)
458                 return NULL;
459
460         while (1) {
461                 printf("%s [%c]\r", task_position_string[priv->tp],
462                                 work_indicator[count % 4]);
463                 count++;
464                 fflush(stdout);
465                 task_period_wait(priv->info);
466         }
467         return NULL;
468 }
469
470 static int print_status_return(void *p)
471 {
472         printf("\n");
473         fflush(stdout);
474
475         return 0;
476 }
477
478 /* Compatible function to allow reuse of old codes */
479 static u64 first_extent_gap(struct rb_root *holes)
480 {
481         struct file_extent_hole *hole;
482
483         if (RB_EMPTY_ROOT(holes))
484                 return (u64)-1;
485
486         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
487         return hole->start;
488 }
489
490 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
491 {
492         struct file_extent_hole *hole1;
493         struct file_extent_hole *hole2;
494
495         hole1 = rb_entry(node1, struct file_extent_hole, node);
496         hole2 = rb_entry(node2, struct file_extent_hole, node);
497
498         if (hole1->start > hole2->start)
499                 return -1;
500         if (hole1->start < hole2->start)
501                 return 1;
502         /* Now hole1->start == hole2->start */
503         if (hole1->len >= hole2->len)
504                 /*
505                  * Hole 1 will be merge center
506                  * Same hole will be merged later
507                  */
508                 return -1;
509         /* Hole 2 will be merge center */
510         return 1;
511 }
512
513 /*
514  * Add a hole to the record
515  *
516  * This will do hole merge for copy_file_extent_holes(),
517  * which will ensure there won't be continuous holes.
518  */
519 static int add_file_extent_hole(struct rb_root *holes,
520                                 u64 start, u64 len)
521 {
522         struct file_extent_hole *hole;
523         struct file_extent_hole *prev = NULL;
524         struct file_extent_hole *next = NULL;
525
526         hole = malloc(sizeof(*hole));
527         if (!hole)
528                 return -ENOMEM;
529         hole->start = start;
530         hole->len = len;
531         /* Since compare will not return 0, no -EEXIST will happen */
532         rb_insert(holes, &hole->node, compare_hole);
533
534         /* simple merge with previous hole */
535         if (rb_prev(&hole->node))
536                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
537                                 node);
538         if (prev && prev->start + prev->len >= hole->start) {
539                 hole->len = hole->start + hole->len - prev->start;
540                 hole->start = prev->start;
541                 rb_erase(&prev->node, holes);
542                 free(prev);
543                 prev = NULL;
544         }
545
546         /* iterate merge with next holes */
547         while (1) {
548                 if (!rb_next(&hole->node))
549                         break;
550                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
551                                         node);
552                 if (hole->start + hole->len >= next->start) {
553                         if (hole->start + hole->len <= next->start + next->len)
554                                 hole->len = next->start + next->len -
555                                             hole->start;
556                         rb_erase(&next->node, holes);
557                         free(next);
558                         next = NULL;
559                 } else
560                         break;
561         }
562         return 0;
563 }
564
565 static int compare_hole_range(struct rb_node *node, void *data)
566 {
567         struct file_extent_hole *hole;
568         u64 start;
569
570         hole = (struct file_extent_hole *)data;
571         start = hole->start;
572
573         hole = rb_entry(node, struct file_extent_hole, node);
574         if (start < hole->start)
575                 return -1;
576         if (start >= hole->start && start < hole->start + hole->len)
577                 return 0;
578         return 1;
579 }
580
581 /*
582  * Delete a hole in the record
583  *
584  * This will do the hole split and is much restrict than add.
585  */
586 static int del_file_extent_hole(struct rb_root *holes,
587                                 u64 start, u64 len)
588 {
589         struct file_extent_hole *hole;
590         struct file_extent_hole tmp;
591         u64 prev_start = 0;
592         u64 prev_len = 0;
593         u64 next_start = 0;
594         u64 next_len = 0;
595         struct rb_node *node;
596         int have_prev = 0;
597         int have_next = 0;
598         int ret = 0;
599
600         tmp.start = start;
601         tmp.len = len;
602         node = rb_search(holes, &tmp, compare_hole_range, NULL);
603         if (!node)
604                 return -EEXIST;
605         hole = rb_entry(node, struct file_extent_hole, node);
606         if (start + len > hole->start + hole->len)
607                 return -EEXIST;
608
609         /*
610          * Now there will be no overlap, delete the hole and re-add the
611          * split(s) if they exists.
612          */
613         if (start > hole->start) {
614                 prev_start = hole->start;
615                 prev_len = start - hole->start;
616                 have_prev = 1;
617         }
618         if (hole->start + hole->len > start + len) {
619                 next_start = start + len;
620                 next_len = hole->start + hole->len - start - len;
621                 have_next = 1;
622         }
623         rb_erase(node, holes);
624         free(hole);
625         if (have_prev) {
626                 ret = add_file_extent_hole(holes, prev_start, prev_len);
627                 if (ret < 0)
628                         return ret;
629         }
630         if (have_next) {
631                 ret = add_file_extent_hole(holes, next_start, next_len);
632                 if (ret < 0)
633                         return ret;
634         }
635         return 0;
636 }
637
638 static int copy_file_extent_holes(struct rb_root *dst,
639                                   struct rb_root *src)
640 {
641         struct file_extent_hole *hole;
642         struct rb_node *node;
643         int ret = 0;
644
645         node = rb_first(src);
646         while (node) {
647                 hole = rb_entry(node, struct file_extent_hole, node);
648                 ret = add_file_extent_hole(dst, hole->start, hole->len);
649                 if (ret)
650                         break;
651                 node = rb_next(node);
652         }
653         return ret;
654 }
655
656 static void free_file_extent_holes(struct rb_root *holes)
657 {
658         struct rb_node *node;
659         struct file_extent_hole *hole;
660
661         node = rb_first(holes);
662         while (node) {
663                 hole = rb_entry(node, struct file_extent_hole, node);
664                 rb_erase(node, holes);
665                 free(hole);
666                 node = rb_first(holes);
667         }
668 }
669
670 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
671
672 static void record_root_in_trans(struct btrfs_trans_handle *trans,
673                                  struct btrfs_root *root)
674 {
675         if (root->last_trans != trans->transid) {
676                 root->track_dirty = 1;
677                 root->last_trans = trans->transid;
678                 root->commit_root = root->node;
679                 extent_buffer_get(root->node);
680         }
681 }
682
683 static u8 imode_to_type(u32 imode)
684 {
685 #define S_SHIFT 12
686         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
687                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
688                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
689                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
690                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
691                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
692                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
693                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
694         };
695
696         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
697 #undef S_SHIFT
698 }
699
700 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
701 {
702         struct device_record *rec1;
703         struct device_record *rec2;
704
705         rec1 = rb_entry(node1, struct device_record, node);
706         rec2 = rb_entry(node2, struct device_record, node);
707         if (rec1->devid > rec2->devid)
708                 return -1;
709         else if (rec1->devid < rec2->devid)
710                 return 1;
711         else
712                 return 0;
713 }
714
715 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
716 {
717         struct inode_record *rec;
718         struct inode_backref *backref;
719         struct inode_backref *orig;
720         struct inode_backref *tmp;
721         struct orphan_data_extent *src_orphan;
722         struct orphan_data_extent *dst_orphan;
723         size_t size;
724         int ret;
725
726         rec = malloc(sizeof(*rec));
727         if (!rec)
728                 return ERR_PTR(-ENOMEM);
729         memcpy(rec, orig_rec, sizeof(*rec));
730         rec->refs = 1;
731         INIT_LIST_HEAD(&rec->backrefs);
732         INIT_LIST_HEAD(&rec->orphan_extents);
733         rec->holes = RB_ROOT;
734
735         list_for_each_entry(orig, &orig_rec->backrefs, list) {
736                 size = sizeof(*orig) + orig->namelen + 1;
737                 backref = malloc(size);
738                 if (!backref) {
739                         ret = -ENOMEM;
740                         goto cleanup;
741                 }
742                 memcpy(backref, orig, size);
743                 list_add_tail(&backref->list, &rec->backrefs);
744         }
745         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
746                 dst_orphan = malloc(sizeof(*dst_orphan));
747                 if (!dst_orphan) {
748                         ret = -ENOMEM;
749                         goto cleanup;
750                 }
751                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
752                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
753         }
754         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
755         BUG_ON(ret < 0);
756
757         return rec;
758
759 cleanup:
760         if (!list_empty(&rec->backrefs))
761                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
762                         list_del(&orig->list);
763                         free(orig);
764                 }
765
766         if (!list_empty(&rec->orphan_extents))
767                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
768                         list_del(&orig->list);
769                         free(orig);
770                 }
771
772         free(rec);
773
774         return ERR_PTR(ret);
775 }
776
777 static void print_orphan_data_extents(struct list_head *orphan_extents,
778                                       u64 objectid)
779 {
780         struct orphan_data_extent *orphan;
781
782         if (list_empty(orphan_extents))
783                 return;
784         printf("The following data extent is lost in tree %llu:\n",
785                objectid);
786         list_for_each_entry(orphan, orphan_extents, list) {
787                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
788                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
789                        orphan->disk_len);
790         }
791 }
792
793 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
794 {
795         u64 root_objectid = root->root_key.objectid;
796         int errors = rec->errors;
797
798         if (!errors)
799                 return;
800         /* reloc root errors, we print its corresponding fs root objectid*/
801         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
802                 root_objectid = root->root_key.offset;
803                 fprintf(stderr, "reloc");
804         }
805         fprintf(stderr, "root %llu inode %llu errors %x",
806                 (unsigned long long) root_objectid,
807                 (unsigned long long) rec->ino, rec->errors);
808
809         if (errors & I_ERR_NO_INODE_ITEM)
810                 fprintf(stderr, ", no inode item");
811         if (errors & I_ERR_NO_ORPHAN_ITEM)
812                 fprintf(stderr, ", no orphan item");
813         if (errors & I_ERR_DUP_INODE_ITEM)
814                 fprintf(stderr, ", dup inode item");
815         if (errors & I_ERR_DUP_DIR_INDEX)
816                 fprintf(stderr, ", dup dir index");
817         if (errors & I_ERR_ODD_DIR_ITEM)
818                 fprintf(stderr, ", odd dir item");
819         if (errors & I_ERR_ODD_FILE_EXTENT)
820                 fprintf(stderr, ", odd file extent");
821         if (errors & I_ERR_BAD_FILE_EXTENT)
822                 fprintf(stderr, ", bad file extent");
823         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
824                 fprintf(stderr, ", file extent overlap");
825         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
826                 fprintf(stderr, ", file extent discount");
827         if (errors & I_ERR_DIR_ISIZE_WRONG)
828                 fprintf(stderr, ", dir isize wrong");
829         if (errors & I_ERR_FILE_NBYTES_WRONG)
830                 fprintf(stderr, ", nbytes wrong");
831         if (errors & I_ERR_ODD_CSUM_ITEM)
832                 fprintf(stderr, ", odd csum item");
833         if (errors & I_ERR_SOME_CSUM_MISSING)
834                 fprintf(stderr, ", some csum missing");
835         if (errors & I_ERR_LINK_COUNT_WRONG)
836                 fprintf(stderr, ", link count wrong");
837         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
838                 fprintf(stderr, ", orphan file extent");
839         fprintf(stderr, "\n");
840         /* Print the orphan extents if needed */
841         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
842                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
843
844         /* Print the holes if needed */
845         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
846                 struct file_extent_hole *hole;
847                 struct rb_node *node;
848                 int found = 0;
849
850                 node = rb_first(&rec->holes);
851                 fprintf(stderr, "Found file extent holes:\n");
852                 while (node) {
853                         found = 1;
854                         hole = rb_entry(node, struct file_extent_hole, node);
855                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
856                                 hole->start, hole->len);
857                         node = rb_next(node);
858                 }
859                 if (!found)
860                         fprintf(stderr, "\tstart: 0, len: %llu\n",
861                                 round_up(rec->isize, root->sectorsize));
862         }
863 }
864
865 static void print_ref_error(int errors)
866 {
867         if (errors & REF_ERR_NO_DIR_ITEM)
868                 fprintf(stderr, ", no dir item");
869         if (errors & REF_ERR_NO_DIR_INDEX)
870                 fprintf(stderr, ", no dir index");
871         if (errors & REF_ERR_NO_INODE_REF)
872                 fprintf(stderr, ", no inode ref");
873         if (errors & REF_ERR_DUP_DIR_ITEM)
874                 fprintf(stderr, ", dup dir item");
875         if (errors & REF_ERR_DUP_DIR_INDEX)
876                 fprintf(stderr, ", dup dir index");
877         if (errors & REF_ERR_DUP_INODE_REF)
878                 fprintf(stderr, ", dup inode ref");
879         if (errors & REF_ERR_INDEX_UNMATCH)
880                 fprintf(stderr, ", index mismatch");
881         if (errors & REF_ERR_FILETYPE_UNMATCH)
882                 fprintf(stderr, ", filetype mismatch");
883         if (errors & REF_ERR_NAME_TOO_LONG)
884                 fprintf(stderr, ", name too long");
885         if (errors & REF_ERR_NO_ROOT_REF)
886                 fprintf(stderr, ", no root ref");
887         if (errors & REF_ERR_NO_ROOT_BACKREF)
888                 fprintf(stderr, ", no root backref");
889         if (errors & REF_ERR_DUP_ROOT_REF)
890                 fprintf(stderr, ", dup root ref");
891         if (errors & REF_ERR_DUP_ROOT_BACKREF)
892                 fprintf(stderr, ", dup root backref");
893         fprintf(stderr, "\n");
894 }
895
896 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
897                                           u64 ino, int mod)
898 {
899         struct ptr_node *node;
900         struct cache_extent *cache;
901         struct inode_record *rec = NULL;
902         int ret;
903
904         cache = lookup_cache_extent(inode_cache, ino, 1);
905         if (cache) {
906                 node = container_of(cache, struct ptr_node, cache);
907                 rec = node->data;
908                 if (mod && rec->refs > 1) {
909                         node->data = clone_inode_rec(rec);
910                         if (IS_ERR(node->data))
911                                 return node->data;
912                         rec->refs--;
913                         rec = node->data;
914                 }
915         } else if (mod) {
916                 rec = calloc(1, sizeof(*rec));
917                 if (!rec)
918                         return ERR_PTR(-ENOMEM);
919                 rec->ino = ino;
920                 rec->extent_start = (u64)-1;
921                 rec->refs = 1;
922                 INIT_LIST_HEAD(&rec->backrefs);
923                 INIT_LIST_HEAD(&rec->orphan_extents);
924                 rec->holes = RB_ROOT;
925
926                 node = malloc(sizeof(*node));
927                 if (!node) {
928                         free(rec);
929                         return ERR_PTR(-ENOMEM);
930                 }
931                 node->cache.start = ino;
932                 node->cache.size = 1;
933                 node->data = rec;
934
935                 if (ino == BTRFS_FREE_INO_OBJECTID)
936                         rec->found_link = 1;
937
938                 ret = insert_cache_extent(inode_cache, &node->cache);
939                 if (ret)
940                         return ERR_PTR(-EEXIST);
941         }
942         return rec;
943 }
944
945 static void free_orphan_data_extents(struct list_head *orphan_extents)
946 {
947         struct orphan_data_extent *orphan;
948
949         while (!list_empty(orphan_extents)) {
950                 orphan = list_entry(orphan_extents->next,
951                                     struct orphan_data_extent, list);
952                 list_del(&orphan->list);
953                 free(orphan);
954         }
955 }
956
957 static void free_inode_rec(struct inode_record *rec)
958 {
959         struct inode_backref *backref;
960
961         if (--rec->refs > 0)
962                 return;
963
964         while (!list_empty(&rec->backrefs)) {
965                 backref = to_inode_backref(rec->backrefs.next);
966                 list_del(&backref->list);
967                 free(backref);
968         }
969         free_orphan_data_extents(&rec->orphan_extents);
970         free_file_extent_holes(&rec->holes);
971         free(rec);
972 }
973
974 static int can_free_inode_rec(struct inode_record *rec)
975 {
976         if (!rec->errors && rec->checked && rec->found_inode_item &&
977             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
978                 return 1;
979         return 0;
980 }
981
982 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
983                                  struct inode_record *rec)
984 {
985         struct cache_extent *cache;
986         struct inode_backref *tmp, *backref;
987         struct ptr_node *node;
988         unsigned char filetype;
989
990         if (!rec->found_inode_item)
991                 return;
992
993         filetype = imode_to_type(rec->imode);
994         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
995                 if (backref->found_dir_item && backref->found_dir_index) {
996                         if (backref->filetype != filetype)
997                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
998                         if (!backref->errors && backref->found_inode_ref &&
999                             rec->nlink == rec->found_link) {
1000                                 list_del(&backref->list);
1001                                 free(backref);
1002                         }
1003                 }
1004         }
1005
1006         if (!rec->checked || rec->merging)
1007                 return;
1008
1009         if (S_ISDIR(rec->imode)) {
1010                 if (rec->found_size != rec->isize)
1011                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1012                 if (rec->found_file_extent)
1013                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1014         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1015                 if (rec->found_dir_item)
1016                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1017                 if (rec->found_size != rec->nbytes)
1018                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1019                 if (rec->nlink > 0 && !no_holes &&
1020                     (rec->extent_end < rec->isize ||
1021                      first_extent_gap(&rec->holes) < rec->isize))
1022                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1023         }
1024
1025         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1026                 if (rec->found_csum_item && rec->nodatasum)
1027                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1028                 if (rec->some_csum_missing && !rec->nodatasum)
1029                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1030         }
1031
1032         BUG_ON(rec->refs != 1);
1033         if (can_free_inode_rec(rec)) {
1034                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1035                 node = container_of(cache, struct ptr_node, cache);
1036                 BUG_ON(node->data != rec);
1037                 remove_cache_extent(inode_cache, &node->cache);
1038                 free(node);
1039                 free_inode_rec(rec);
1040         }
1041 }
1042
1043 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1044 {
1045         struct btrfs_path path;
1046         struct btrfs_key key;
1047         int ret;
1048
1049         key.objectid = BTRFS_ORPHAN_OBJECTID;
1050         key.type = BTRFS_ORPHAN_ITEM_KEY;
1051         key.offset = ino;
1052
1053         btrfs_init_path(&path);
1054         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1055         btrfs_release_path(&path);
1056         if (ret > 0)
1057                 ret = -ENOENT;
1058         return ret;
1059 }
1060
1061 static int process_inode_item(struct extent_buffer *eb,
1062                               int slot, struct btrfs_key *key,
1063                               struct shared_node *active_node)
1064 {
1065         struct inode_record *rec;
1066         struct btrfs_inode_item *item;
1067
1068         rec = active_node->current;
1069         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1070         if (rec->found_inode_item) {
1071                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1072                 return 1;
1073         }
1074         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1075         rec->nlink = btrfs_inode_nlink(eb, item);
1076         rec->isize = btrfs_inode_size(eb, item);
1077         rec->nbytes = btrfs_inode_nbytes(eb, item);
1078         rec->imode = btrfs_inode_mode(eb, item);
1079         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1080                 rec->nodatasum = 1;
1081         rec->found_inode_item = 1;
1082         if (rec->nlink == 0)
1083                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1084         maybe_free_inode_rec(&active_node->inode_cache, rec);
1085         return 0;
1086 }
1087
1088 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1089                                                 const char *name,
1090                                                 int namelen, u64 dir)
1091 {
1092         struct inode_backref *backref;
1093
1094         list_for_each_entry(backref, &rec->backrefs, list) {
1095                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1096                         break;
1097                 if (backref->dir != dir || backref->namelen != namelen)
1098                         continue;
1099                 if (memcmp(name, backref->name, namelen))
1100                         continue;
1101                 return backref;
1102         }
1103
1104         backref = malloc(sizeof(*backref) + namelen + 1);
1105         if (!backref)
1106                 return NULL;
1107         memset(backref, 0, sizeof(*backref));
1108         backref->dir = dir;
1109         backref->namelen = namelen;
1110         memcpy(backref->name, name, namelen);
1111         backref->name[namelen] = '\0';
1112         list_add_tail(&backref->list, &rec->backrefs);
1113         return backref;
1114 }
1115
1116 static int add_inode_backref(struct cache_tree *inode_cache,
1117                              u64 ino, u64 dir, u64 index,
1118                              const char *name, int namelen,
1119                              int filetype, int itemtype, int errors)
1120 {
1121         struct inode_record *rec;
1122         struct inode_backref *backref;
1123
1124         rec = get_inode_rec(inode_cache, ino, 1);
1125         BUG_ON(IS_ERR(rec));
1126         backref = get_inode_backref(rec, name, namelen, dir);
1127         BUG_ON(!backref);
1128         if (errors)
1129                 backref->errors |= errors;
1130         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1131                 if (backref->found_dir_index)
1132                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1133                 if (backref->found_inode_ref && backref->index != index)
1134                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1135                 if (backref->found_dir_item && backref->filetype != filetype)
1136                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1137
1138                 backref->index = index;
1139                 backref->filetype = filetype;
1140                 backref->found_dir_index = 1;
1141         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1142                 rec->found_link++;
1143                 if (backref->found_dir_item)
1144                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1145                 if (backref->found_dir_index && backref->filetype != filetype)
1146                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1147
1148                 backref->filetype = filetype;
1149                 backref->found_dir_item = 1;
1150         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1151                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1152                 if (backref->found_inode_ref)
1153                         backref->errors |= REF_ERR_DUP_INODE_REF;
1154                 if (backref->found_dir_index && backref->index != index)
1155                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1156                 else
1157                         backref->index = index;
1158
1159                 backref->ref_type = itemtype;
1160                 backref->found_inode_ref = 1;
1161         } else {
1162                 BUG_ON(1);
1163         }
1164
1165         maybe_free_inode_rec(inode_cache, rec);
1166         return 0;
1167 }
1168
1169 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1170                             struct cache_tree *dst_cache)
1171 {
1172         struct inode_backref *backref;
1173         u32 dir_count = 0;
1174         int ret = 0;
1175
1176         dst->merging = 1;
1177         list_for_each_entry(backref, &src->backrefs, list) {
1178                 if (backref->found_dir_index) {
1179                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1180                                         backref->index, backref->name,
1181                                         backref->namelen, backref->filetype,
1182                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1183                 }
1184                 if (backref->found_dir_item) {
1185                         dir_count++;
1186                         add_inode_backref(dst_cache, dst->ino,
1187                                         backref->dir, 0, backref->name,
1188                                         backref->namelen, backref->filetype,
1189                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1190                 }
1191                 if (backref->found_inode_ref) {
1192                         add_inode_backref(dst_cache, dst->ino,
1193                                         backref->dir, backref->index,
1194                                         backref->name, backref->namelen, 0,
1195                                         backref->ref_type, backref->errors);
1196                 }
1197         }
1198
1199         if (src->found_dir_item)
1200                 dst->found_dir_item = 1;
1201         if (src->found_file_extent)
1202                 dst->found_file_extent = 1;
1203         if (src->found_csum_item)
1204                 dst->found_csum_item = 1;
1205         if (src->some_csum_missing)
1206                 dst->some_csum_missing = 1;
1207         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1208                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1209                 if (ret < 0)
1210                         return ret;
1211         }
1212
1213         BUG_ON(src->found_link < dir_count);
1214         dst->found_link += src->found_link - dir_count;
1215         dst->found_size += src->found_size;
1216         if (src->extent_start != (u64)-1) {
1217                 if (dst->extent_start == (u64)-1) {
1218                         dst->extent_start = src->extent_start;
1219                         dst->extent_end = src->extent_end;
1220                 } else {
1221                         if (dst->extent_end > src->extent_start)
1222                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1223                         else if (dst->extent_end < src->extent_start) {
1224                                 ret = add_file_extent_hole(&dst->holes,
1225                                         dst->extent_end,
1226                                         src->extent_start - dst->extent_end);
1227                         }
1228                         if (dst->extent_end < src->extent_end)
1229                                 dst->extent_end = src->extent_end;
1230                 }
1231         }
1232
1233         dst->errors |= src->errors;
1234         if (src->found_inode_item) {
1235                 if (!dst->found_inode_item) {
1236                         dst->nlink = src->nlink;
1237                         dst->isize = src->isize;
1238                         dst->nbytes = src->nbytes;
1239                         dst->imode = src->imode;
1240                         dst->nodatasum = src->nodatasum;
1241                         dst->found_inode_item = 1;
1242                 } else {
1243                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1244                 }
1245         }
1246         dst->merging = 0;
1247
1248         return 0;
1249 }
1250
1251 static int splice_shared_node(struct shared_node *src_node,
1252                               struct shared_node *dst_node)
1253 {
1254         struct cache_extent *cache;
1255         struct ptr_node *node, *ins;
1256         struct cache_tree *src, *dst;
1257         struct inode_record *rec, *conflict;
1258         u64 current_ino = 0;
1259         int splice = 0;
1260         int ret;
1261
1262         if (--src_node->refs == 0)
1263                 splice = 1;
1264         if (src_node->current)
1265                 current_ino = src_node->current->ino;
1266
1267         src = &src_node->root_cache;
1268         dst = &dst_node->root_cache;
1269 again:
1270         cache = search_cache_extent(src, 0);
1271         while (cache) {
1272                 node = container_of(cache, struct ptr_node, cache);
1273                 rec = node->data;
1274                 cache = next_cache_extent(cache);
1275
1276                 if (splice) {
1277                         remove_cache_extent(src, &node->cache);
1278                         ins = node;
1279                 } else {
1280                         ins = malloc(sizeof(*ins));
1281                         BUG_ON(!ins);
1282                         ins->cache.start = node->cache.start;
1283                         ins->cache.size = node->cache.size;
1284                         ins->data = rec;
1285                         rec->refs++;
1286                 }
1287                 ret = insert_cache_extent(dst, &ins->cache);
1288                 if (ret == -EEXIST) {
1289                         conflict = get_inode_rec(dst, rec->ino, 1);
1290                         BUG_ON(IS_ERR(conflict));
1291                         merge_inode_recs(rec, conflict, dst);
1292                         if (rec->checked) {
1293                                 conflict->checked = 1;
1294                                 if (dst_node->current == conflict)
1295                                         dst_node->current = NULL;
1296                         }
1297                         maybe_free_inode_rec(dst, conflict);
1298                         free_inode_rec(rec);
1299                         free(ins);
1300                 } else {
1301                         BUG_ON(ret);
1302                 }
1303         }
1304
1305         if (src == &src_node->root_cache) {
1306                 src = &src_node->inode_cache;
1307                 dst = &dst_node->inode_cache;
1308                 goto again;
1309         }
1310
1311         if (current_ino > 0 && (!dst_node->current ||
1312             current_ino > dst_node->current->ino)) {
1313                 if (dst_node->current) {
1314                         dst_node->current->checked = 1;
1315                         maybe_free_inode_rec(dst, dst_node->current);
1316                 }
1317                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1318                 BUG_ON(IS_ERR(dst_node->current));
1319         }
1320         return 0;
1321 }
1322
1323 static void free_inode_ptr(struct cache_extent *cache)
1324 {
1325         struct ptr_node *node;
1326         struct inode_record *rec;
1327
1328         node = container_of(cache, struct ptr_node, cache);
1329         rec = node->data;
1330         free_inode_rec(rec);
1331         free(node);
1332 }
1333
1334 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1335
1336 static struct shared_node *find_shared_node(struct cache_tree *shared,
1337                                             u64 bytenr)
1338 {
1339         struct cache_extent *cache;
1340         struct shared_node *node;
1341
1342         cache = lookup_cache_extent(shared, bytenr, 1);
1343         if (cache) {
1344                 node = container_of(cache, struct shared_node, cache);
1345                 return node;
1346         }
1347         return NULL;
1348 }
1349
1350 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1351 {
1352         int ret;
1353         struct shared_node *node;
1354
1355         node = calloc(1, sizeof(*node));
1356         if (!node)
1357                 return -ENOMEM;
1358         node->cache.start = bytenr;
1359         node->cache.size = 1;
1360         cache_tree_init(&node->root_cache);
1361         cache_tree_init(&node->inode_cache);
1362         node->refs = refs;
1363
1364         ret = insert_cache_extent(shared, &node->cache);
1365
1366         return ret;
1367 }
1368
1369 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1370                              struct walk_control *wc, int level)
1371 {
1372         struct shared_node *node;
1373         struct shared_node *dest;
1374         int ret;
1375
1376         if (level == wc->active_node)
1377                 return 0;
1378
1379         BUG_ON(wc->active_node <= level);
1380         node = find_shared_node(&wc->shared, bytenr);
1381         if (!node) {
1382                 ret = add_shared_node(&wc->shared, bytenr, refs);
1383                 BUG_ON(ret);
1384                 node = find_shared_node(&wc->shared, bytenr);
1385                 wc->nodes[level] = node;
1386                 wc->active_node = level;
1387                 return 0;
1388         }
1389
1390         if (wc->root_level == wc->active_node &&
1391             btrfs_root_refs(&root->root_item) == 0) {
1392                 if (--node->refs == 0) {
1393                         free_inode_recs_tree(&node->root_cache);
1394                         free_inode_recs_tree(&node->inode_cache);
1395                         remove_cache_extent(&wc->shared, &node->cache);
1396                         free(node);
1397                 }
1398                 return 1;
1399         }
1400
1401         dest = wc->nodes[wc->active_node];
1402         splice_shared_node(node, dest);
1403         if (node->refs == 0) {
1404                 remove_cache_extent(&wc->shared, &node->cache);
1405                 free(node);
1406         }
1407         return 1;
1408 }
1409
1410 static int leave_shared_node(struct btrfs_root *root,
1411                              struct walk_control *wc, int level)
1412 {
1413         struct shared_node *node;
1414         struct shared_node *dest;
1415         int i;
1416
1417         if (level == wc->root_level)
1418                 return 0;
1419
1420         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1421                 if (wc->nodes[i])
1422                         break;
1423         }
1424         BUG_ON(i >= BTRFS_MAX_LEVEL);
1425
1426         node = wc->nodes[wc->active_node];
1427         wc->nodes[wc->active_node] = NULL;
1428         wc->active_node = i;
1429
1430         dest = wc->nodes[wc->active_node];
1431         if (wc->active_node < wc->root_level ||
1432             btrfs_root_refs(&root->root_item) > 0) {
1433                 BUG_ON(node->refs <= 1);
1434                 splice_shared_node(node, dest);
1435         } else {
1436                 BUG_ON(node->refs < 2);
1437                 node->refs--;
1438         }
1439         return 0;
1440 }
1441
1442 /*
1443  * Returns:
1444  * < 0 - on error
1445  * 1   - if the root with id child_root_id is a child of root parent_root_id
1446  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1447  *       has other root(s) as parent(s)
1448  * 2   - if the root child_root_id doesn't have any parent roots
1449  */
1450 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1451                          u64 child_root_id)
1452 {
1453         struct btrfs_path path;
1454         struct btrfs_key key;
1455         struct extent_buffer *leaf;
1456         int has_parent = 0;
1457         int ret;
1458
1459         btrfs_init_path(&path);
1460
1461         key.objectid = parent_root_id;
1462         key.type = BTRFS_ROOT_REF_KEY;
1463         key.offset = child_root_id;
1464         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1465                                 0, 0);
1466         if (ret < 0)
1467                 return ret;
1468         btrfs_release_path(&path);
1469         if (!ret)
1470                 return 1;
1471
1472         key.objectid = child_root_id;
1473         key.type = BTRFS_ROOT_BACKREF_KEY;
1474         key.offset = 0;
1475         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1476                                 0, 0);
1477         if (ret < 0)
1478                 goto out;
1479
1480         while (1) {
1481                 leaf = path.nodes[0];
1482                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1483                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1484                         if (ret)
1485                                 break;
1486                         leaf = path.nodes[0];
1487                 }
1488
1489                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1490                 if (key.objectid != child_root_id ||
1491                     key.type != BTRFS_ROOT_BACKREF_KEY)
1492                         break;
1493
1494                 has_parent = 1;
1495
1496                 if (key.offset == parent_root_id) {
1497                         btrfs_release_path(&path);
1498                         return 1;
1499                 }
1500
1501                 path.slots[0]++;
1502         }
1503 out:
1504         btrfs_release_path(&path);
1505         if (ret < 0)
1506                 return ret;
1507         return has_parent ? 0 : 2;
1508 }
1509
1510 static int process_dir_item(struct btrfs_root *root,
1511                             struct extent_buffer *eb,
1512                             int slot, struct btrfs_key *key,
1513                             struct shared_node *active_node)
1514 {
1515         u32 total;
1516         u32 cur = 0;
1517         u32 len;
1518         u32 name_len;
1519         u32 data_len;
1520         int error;
1521         int nritems = 0;
1522         int filetype;
1523         struct btrfs_dir_item *di;
1524         struct inode_record *rec;
1525         struct cache_tree *root_cache;
1526         struct cache_tree *inode_cache;
1527         struct btrfs_key location;
1528         char namebuf[BTRFS_NAME_LEN];
1529
1530         root_cache = &active_node->root_cache;
1531         inode_cache = &active_node->inode_cache;
1532         rec = active_node->current;
1533         rec->found_dir_item = 1;
1534
1535         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1536         total = btrfs_item_size_nr(eb, slot);
1537         while (cur < total) {
1538                 nritems++;
1539                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1540                 name_len = btrfs_dir_name_len(eb, di);
1541                 data_len = btrfs_dir_data_len(eb, di);
1542                 filetype = btrfs_dir_type(eb, di);
1543
1544                 rec->found_size += name_len;
1545                 if (name_len <= BTRFS_NAME_LEN) {
1546                         len = name_len;
1547                         error = 0;
1548                 } else {
1549                         len = BTRFS_NAME_LEN;
1550                         error = REF_ERR_NAME_TOO_LONG;
1551                 }
1552                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1553
1554                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1555                         add_inode_backref(inode_cache, location.objectid,
1556                                           key->objectid, key->offset, namebuf,
1557                                           len, filetype, key->type, error);
1558                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1559                         add_inode_backref(root_cache, location.objectid,
1560                                           key->objectid, key->offset,
1561                                           namebuf, len, filetype,
1562                                           key->type, error);
1563                 } else {
1564                         fprintf(stderr, "invalid location in dir item %u\n",
1565                                 location.type);
1566                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1567                                           key->objectid, key->offset, namebuf,
1568                                           len, filetype, key->type, error);
1569                 }
1570
1571                 len = sizeof(*di) + name_len + data_len;
1572                 di = (struct btrfs_dir_item *)((char *)di + len);
1573                 cur += len;
1574         }
1575         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1576                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1577
1578         return 0;
1579 }
1580
1581 static int process_inode_ref(struct extent_buffer *eb,
1582                              int slot, struct btrfs_key *key,
1583                              struct shared_node *active_node)
1584 {
1585         u32 total;
1586         u32 cur = 0;
1587         u32 len;
1588         u32 name_len;
1589         u64 index;
1590         int error;
1591         struct cache_tree *inode_cache;
1592         struct btrfs_inode_ref *ref;
1593         char namebuf[BTRFS_NAME_LEN];
1594
1595         inode_cache = &active_node->inode_cache;
1596
1597         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1598         total = btrfs_item_size_nr(eb, slot);
1599         while (cur < total) {
1600                 name_len = btrfs_inode_ref_name_len(eb, ref);
1601                 index = btrfs_inode_ref_index(eb, ref);
1602                 if (name_len <= BTRFS_NAME_LEN) {
1603                         len = name_len;
1604                         error = 0;
1605                 } else {
1606                         len = BTRFS_NAME_LEN;
1607                         error = REF_ERR_NAME_TOO_LONG;
1608                 }
1609                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1610                 add_inode_backref(inode_cache, key->objectid, key->offset,
1611                                   index, namebuf, len, 0, key->type, error);
1612
1613                 len = sizeof(*ref) + name_len;
1614                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1615                 cur += len;
1616         }
1617         return 0;
1618 }
1619
1620 static int process_inode_extref(struct extent_buffer *eb,
1621                                 int slot, struct btrfs_key *key,
1622                                 struct shared_node *active_node)
1623 {
1624         u32 total;
1625         u32 cur = 0;
1626         u32 len;
1627         u32 name_len;
1628         u64 index;
1629         u64 parent;
1630         int error;
1631         struct cache_tree *inode_cache;
1632         struct btrfs_inode_extref *extref;
1633         char namebuf[BTRFS_NAME_LEN];
1634
1635         inode_cache = &active_node->inode_cache;
1636
1637         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1638         total = btrfs_item_size_nr(eb, slot);
1639         while (cur < total) {
1640                 name_len = btrfs_inode_extref_name_len(eb, extref);
1641                 index = btrfs_inode_extref_index(eb, extref);
1642                 parent = btrfs_inode_extref_parent(eb, extref);
1643                 if (name_len <= BTRFS_NAME_LEN) {
1644                         len = name_len;
1645                         error = 0;
1646                 } else {
1647                         len = BTRFS_NAME_LEN;
1648                         error = REF_ERR_NAME_TOO_LONG;
1649                 }
1650                 read_extent_buffer(eb, namebuf,
1651                                    (unsigned long)(extref + 1), len);
1652                 add_inode_backref(inode_cache, key->objectid, parent,
1653                                   index, namebuf, len, 0, key->type, error);
1654
1655                 len = sizeof(*extref) + name_len;
1656                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1657                 cur += len;
1658         }
1659         return 0;
1660
1661 }
1662
1663 static int count_csum_range(struct btrfs_root *root, u64 start,
1664                             u64 len, u64 *found)
1665 {
1666         struct btrfs_key key;
1667         struct btrfs_path path;
1668         struct extent_buffer *leaf;
1669         int ret;
1670         size_t size;
1671         *found = 0;
1672         u64 csum_end;
1673         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1674
1675         btrfs_init_path(&path);
1676
1677         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1678         key.offset = start;
1679         key.type = BTRFS_EXTENT_CSUM_KEY;
1680
1681         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1682                                 &key, &path, 0, 0);
1683         if (ret < 0)
1684                 goto out;
1685         if (ret > 0 && path.slots[0] > 0) {
1686                 leaf = path.nodes[0];
1687                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1688                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1689                     key.type == BTRFS_EXTENT_CSUM_KEY)
1690                         path.slots[0]--;
1691         }
1692
1693         while (len > 0) {
1694                 leaf = path.nodes[0];
1695                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1696                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1697                         if (ret > 0)
1698                                 break;
1699                         else if (ret < 0)
1700                                 goto out;
1701                         leaf = path.nodes[0];
1702                 }
1703
1704                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1705                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1706                     key.type != BTRFS_EXTENT_CSUM_KEY)
1707                         break;
1708
1709                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1710                 if (key.offset >= start + len)
1711                         break;
1712
1713                 if (key.offset > start)
1714                         start = key.offset;
1715
1716                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1717                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1718                 if (csum_end > start) {
1719                         size = min(csum_end - start, len);
1720                         len -= size;
1721                         start += size;
1722                         *found += size;
1723                 }
1724
1725                 path.slots[0]++;
1726         }
1727 out:
1728         btrfs_release_path(&path);
1729         if (ret < 0)
1730                 return ret;
1731         return 0;
1732 }
1733
1734 static int process_file_extent(struct btrfs_root *root,
1735                                 struct extent_buffer *eb,
1736                                 int slot, struct btrfs_key *key,
1737                                 struct shared_node *active_node)
1738 {
1739         struct inode_record *rec;
1740         struct btrfs_file_extent_item *fi;
1741         u64 num_bytes = 0;
1742         u64 disk_bytenr = 0;
1743         u64 extent_offset = 0;
1744         u64 mask = root->sectorsize - 1;
1745         int extent_type;
1746         int ret;
1747
1748         rec = active_node->current;
1749         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1750         rec->found_file_extent = 1;
1751
1752         if (rec->extent_start == (u64)-1) {
1753                 rec->extent_start = key->offset;
1754                 rec->extent_end = key->offset;
1755         }
1756
1757         if (rec->extent_end > key->offset)
1758                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1759         else if (rec->extent_end < key->offset) {
1760                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1761                                            key->offset - rec->extent_end);
1762                 if (ret < 0)
1763                         return ret;
1764         }
1765
1766         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1767         extent_type = btrfs_file_extent_type(eb, fi);
1768
1769         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1770                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1771                 if (num_bytes == 0)
1772                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1773                 rec->found_size += num_bytes;
1774                 num_bytes = (num_bytes + mask) & ~mask;
1775         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1776                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1777                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1778                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1779                 extent_offset = btrfs_file_extent_offset(eb, fi);
1780                 if (num_bytes == 0 || (num_bytes & mask))
1781                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1782                 if (num_bytes + extent_offset >
1783                     btrfs_file_extent_ram_bytes(eb, fi))
1784                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1785                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1786                     (btrfs_file_extent_compression(eb, fi) ||
1787                      btrfs_file_extent_encryption(eb, fi) ||
1788                      btrfs_file_extent_other_encoding(eb, fi)))
1789                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1790                 if (disk_bytenr > 0)
1791                         rec->found_size += num_bytes;
1792         } else {
1793                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1794         }
1795         rec->extent_end = key->offset + num_bytes;
1796
1797         /*
1798          * The data reloc tree will copy full extents into its inode and then
1799          * copy the corresponding csums.  Because the extent it copied could be
1800          * a preallocated extent that hasn't been written to yet there may be no
1801          * csums to copy, ergo we won't have csums for our file extent.  This is
1802          * ok so just don't bother checking csums if the inode belongs to the
1803          * data reloc tree.
1804          */
1805         if (disk_bytenr > 0 &&
1806             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1807                 u64 found;
1808                 if (btrfs_file_extent_compression(eb, fi))
1809                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1810                 else
1811                         disk_bytenr += extent_offset;
1812
1813                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1814                 if (ret < 0)
1815                         return ret;
1816                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1817                         if (found > 0)
1818                                 rec->found_csum_item = 1;
1819                         if (found < num_bytes)
1820                                 rec->some_csum_missing = 1;
1821                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1822                         if (found > 0)
1823                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1824                 }
1825         }
1826         return 0;
1827 }
1828
1829 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1830                             struct walk_control *wc)
1831 {
1832         struct btrfs_key key;
1833         u32 nritems;
1834         int i;
1835         int ret = 0;
1836         struct cache_tree *inode_cache;
1837         struct shared_node *active_node;
1838
1839         if (wc->root_level == wc->active_node &&
1840             btrfs_root_refs(&root->root_item) == 0)
1841                 return 0;
1842
1843         active_node = wc->nodes[wc->active_node];
1844         inode_cache = &active_node->inode_cache;
1845         nritems = btrfs_header_nritems(eb);
1846         for (i = 0; i < nritems; i++) {
1847                 btrfs_item_key_to_cpu(eb, &key, i);
1848
1849                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1850                         continue;
1851                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1852                         continue;
1853
1854                 if (active_node->current == NULL ||
1855                     active_node->current->ino < key.objectid) {
1856                         if (active_node->current) {
1857                                 active_node->current->checked = 1;
1858                                 maybe_free_inode_rec(inode_cache,
1859                                                      active_node->current);
1860                         }
1861                         active_node->current = get_inode_rec(inode_cache,
1862                                                              key.objectid, 1);
1863                         BUG_ON(IS_ERR(active_node->current));
1864                 }
1865                 switch (key.type) {
1866                 case BTRFS_DIR_ITEM_KEY:
1867                 case BTRFS_DIR_INDEX_KEY:
1868                         ret = process_dir_item(root, eb, i, &key, active_node);
1869                         break;
1870                 case BTRFS_INODE_REF_KEY:
1871                         ret = process_inode_ref(eb, i, &key, active_node);
1872                         break;
1873                 case BTRFS_INODE_EXTREF_KEY:
1874                         ret = process_inode_extref(eb, i, &key, active_node);
1875                         break;
1876                 case BTRFS_INODE_ITEM_KEY:
1877                         ret = process_inode_item(eb, i, &key, active_node);
1878                         break;
1879                 case BTRFS_EXTENT_DATA_KEY:
1880                         ret = process_file_extent(root, eb, i, &key,
1881                                                   active_node);
1882                         break;
1883                 default:
1884                         break;
1885                 };
1886         }
1887         return ret;
1888 }
1889
1890 static void reada_walk_down(struct btrfs_root *root,
1891                             struct extent_buffer *node, int slot)
1892 {
1893         u64 bytenr;
1894         u64 ptr_gen;
1895         u32 nritems;
1896         u32 blocksize;
1897         int i;
1898         int level;
1899
1900         level = btrfs_header_level(node);
1901         if (level != 1)
1902                 return;
1903
1904         nritems = btrfs_header_nritems(node);
1905         blocksize = root->nodesize;
1906         for (i = slot; i < nritems; i++) {
1907                 bytenr = btrfs_node_blockptr(node, i);
1908                 ptr_gen = btrfs_node_ptr_generation(node, i);
1909                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1910         }
1911 }
1912
1913 /*
1914  * Check the child node/leaf by the following condition:
1915  * 1. the first item key of the node/leaf should be the same with the one
1916  *    in parent.
1917  * 2. block in parent node should match the child node/leaf.
1918  * 3. generation of parent node and child's header should be consistent.
1919  *
1920  * Or the child node/leaf pointed by the key in parent is not valid.
1921  *
1922  * We hope to check leaf owner too, but since subvol may share leaves,
1923  * which makes leaf owner check not so strong, key check should be
1924  * sufficient enough for that case.
1925  */
1926 static int check_child_node(struct btrfs_root *root,
1927                             struct extent_buffer *parent, int slot,
1928                             struct extent_buffer *child)
1929 {
1930         struct btrfs_key parent_key;
1931         struct btrfs_key child_key;
1932         int ret = 0;
1933
1934         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1935         if (btrfs_header_level(child) == 0)
1936                 btrfs_item_key_to_cpu(child, &child_key, 0);
1937         else
1938                 btrfs_node_key_to_cpu(child, &child_key, 0);
1939
1940         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1941                 ret = -EINVAL;
1942                 fprintf(stderr,
1943                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1944                         parent_key.objectid, parent_key.type, parent_key.offset,
1945                         child_key.objectid, child_key.type, child_key.offset);
1946         }
1947         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1948                 ret = -EINVAL;
1949                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1950                         btrfs_node_blockptr(parent, slot),
1951                         btrfs_header_bytenr(child));
1952         }
1953         if (btrfs_node_ptr_generation(parent, slot) !=
1954             btrfs_header_generation(child)) {
1955                 ret = -EINVAL;
1956                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1957                         btrfs_header_generation(child),
1958                         btrfs_node_ptr_generation(parent, slot));
1959         }
1960         return ret;
1961 }
1962
1963 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1964                           struct walk_control *wc, int *level)
1965 {
1966         enum btrfs_tree_block_status status;
1967         u64 bytenr;
1968         u64 ptr_gen;
1969         struct extent_buffer *next;
1970         struct extent_buffer *cur;
1971         u32 blocksize;
1972         int ret, err = 0;
1973         u64 refs;
1974
1975         WARN_ON(*level < 0);
1976         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1977         ret = btrfs_lookup_extent_info(NULL, root,
1978                                        path->nodes[*level]->start,
1979                                        *level, 1, &refs, NULL);
1980         if (ret < 0) {
1981                 err = ret;
1982                 goto out;
1983         }
1984
1985         if (refs > 1) {
1986                 ret = enter_shared_node(root, path->nodes[*level]->start,
1987                                         refs, wc, *level);
1988                 if (ret > 0) {
1989                         err = ret;
1990                         goto out;
1991                 }
1992         }
1993
1994         while (*level >= 0) {
1995                 WARN_ON(*level < 0);
1996                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1997                 cur = path->nodes[*level];
1998
1999                 if (btrfs_header_level(cur) != *level)
2000                         WARN_ON(1);
2001
2002                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2003                         break;
2004                 if (*level == 0) {
2005                         ret = process_one_leaf(root, cur, wc);
2006                         if (ret < 0)
2007                                 err = ret;
2008                         break;
2009                 }
2010                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2011                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2012                 blocksize = root->nodesize;
2013                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
2014                                                1, &refs, NULL);
2015                 if (ret < 0)
2016                         refs = 0;
2017
2018                 if (refs > 1) {
2019                         ret = enter_shared_node(root, bytenr, refs,
2020                                                 wc, *level - 1);
2021                         if (ret > 0) {
2022                                 path->slots[*level]++;
2023                                 continue;
2024                         }
2025                 }
2026
2027                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2028                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2029                         free_extent_buffer(next);
2030                         reada_walk_down(root, cur, path->slots[*level]);
2031                         next = read_tree_block(root, bytenr, blocksize,
2032                                                ptr_gen);
2033                         if (!extent_buffer_uptodate(next)) {
2034                                 struct btrfs_key node_key;
2035
2036                                 btrfs_node_key_to_cpu(path->nodes[*level],
2037                                                       &node_key,
2038                                                       path->slots[*level]);
2039                                 btrfs_add_corrupt_extent_record(root->fs_info,
2040                                                 &node_key,
2041                                                 path->nodes[*level]->start,
2042                                                 root->nodesize, *level);
2043                                 err = -EIO;
2044                                 goto out;
2045                         }
2046                 }
2047
2048                 ret = check_child_node(root, cur, path->slots[*level], next);
2049                 if (ret) {
2050                         err = ret;
2051                         goto out;
2052                 }
2053
2054                 if (btrfs_is_leaf(next))
2055                         status = btrfs_check_leaf(root, NULL, next);
2056                 else
2057                         status = btrfs_check_node(root, NULL, next);
2058                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2059                         free_extent_buffer(next);
2060                         err = -EIO;
2061                         goto out;
2062                 }
2063
2064                 *level = *level - 1;
2065                 free_extent_buffer(path->nodes[*level]);
2066                 path->nodes[*level] = next;
2067                 path->slots[*level] = 0;
2068         }
2069 out:
2070         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2071         return err;
2072 }
2073
2074 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2075                         struct walk_control *wc, int *level)
2076 {
2077         int i;
2078         struct extent_buffer *leaf;
2079
2080         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2081                 leaf = path->nodes[i];
2082                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2083                         path->slots[i]++;
2084                         *level = i;
2085                         return 0;
2086                 } else {
2087                         free_extent_buffer(path->nodes[*level]);
2088                         path->nodes[*level] = NULL;
2089                         BUG_ON(*level > wc->active_node);
2090                         if (*level == wc->active_node)
2091                                 leave_shared_node(root, wc, *level);
2092                         *level = i + 1;
2093                 }
2094         }
2095         return 1;
2096 }
2097
2098 static int check_root_dir(struct inode_record *rec)
2099 {
2100         struct inode_backref *backref;
2101         int ret = -1;
2102
2103         if (!rec->found_inode_item || rec->errors)
2104                 goto out;
2105         if (rec->nlink != 1 || rec->found_link != 0)
2106                 goto out;
2107         if (list_empty(&rec->backrefs))
2108                 goto out;
2109         backref = to_inode_backref(rec->backrefs.next);
2110         if (!backref->found_inode_ref)
2111                 goto out;
2112         if (backref->index != 0 || backref->namelen != 2 ||
2113             memcmp(backref->name, "..", 2))
2114                 goto out;
2115         if (backref->found_dir_index || backref->found_dir_item)
2116                 goto out;
2117         ret = 0;
2118 out:
2119         return ret;
2120 }
2121
2122 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2123                               struct btrfs_root *root, struct btrfs_path *path,
2124                               struct inode_record *rec)
2125 {
2126         struct btrfs_inode_item *ei;
2127         struct btrfs_key key;
2128         int ret;
2129
2130         key.objectid = rec->ino;
2131         key.type = BTRFS_INODE_ITEM_KEY;
2132         key.offset = (u64)-1;
2133
2134         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2135         if (ret < 0)
2136                 goto out;
2137         if (ret) {
2138                 if (!path->slots[0]) {
2139                         ret = -ENOENT;
2140                         goto out;
2141                 }
2142                 path->slots[0]--;
2143                 ret = 0;
2144         }
2145         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2146         if (key.objectid != rec->ino) {
2147                 ret = -ENOENT;
2148                 goto out;
2149         }
2150
2151         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2152                             struct btrfs_inode_item);
2153         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2154         btrfs_mark_buffer_dirty(path->nodes[0]);
2155         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2156         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2157                root->root_key.objectid);
2158 out:
2159         btrfs_release_path(path);
2160         return ret;
2161 }
2162
2163 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2164                                     struct btrfs_root *root,
2165                                     struct btrfs_path *path,
2166                                     struct inode_record *rec)
2167 {
2168         int ret;
2169
2170         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2171         btrfs_release_path(path);
2172         if (!ret)
2173                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2174         return ret;
2175 }
2176
2177 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2178                                struct btrfs_root *root,
2179                                struct btrfs_path *path,
2180                                struct inode_record *rec)
2181 {
2182         struct btrfs_inode_item *ei;
2183         struct btrfs_key key;
2184         int ret = 0;
2185
2186         key.objectid = rec->ino;
2187         key.type = BTRFS_INODE_ITEM_KEY;
2188         key.offset = 0;
2189
2190         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2191         if (ret) {
2192                 if (ret > 0)
2193                         ret = -ENOENT;
2194                 goto out;
2195         }
2196
2197         /* Since ret == 0, no need to check anything */
2198         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2199                             struct btrfs_inode_item);
2200         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2201         btrfs_mark_buffer_dirty(path->nodes[0]);
2202         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2203         printf("reset nbytes for ino %llu root %llu\n",
2204                rec->ino, root->root_key.objectid);
2205 out:
2206         btrfs_release_path(path);
2207         return ret;
2208 }
2209
2210 static int add_missing_dir_index(struct btrfs_root *root,
2211                                  struct cache_tree *inode_cache,
2212                                  struct inode_record *rec,
2213                                  struct inode_backref *backref)
2214 {
2215         struct btrfs_path *path;
2216         struct btrfs_trans_handle *trans;
2217         struct btrfs_dir_item *dir_item;
2218         struct extent_buffer *leaf;
2219         struct btrfs_key key;
2220         struct btrfs_disk_key disk_key;
2221         struct inode_record *dir_rec;
2222         unsigned long name_ptr;
2223         u32 data_size = sizeof(*dir_item) + backref->namelen;
2224         int ret;
2225
2226         path = btrfs_alloc_path();
2227         if (!path)
2228                 return -ENOMEM;
2229
2230         trans = btrfs_start_transaction(root, 1);
2231         if (IS_ERR(trans)) {
2232                 btrfs_free_path(path);
2233                 return PTR_ERR(trans);
2234         }
2235
2236         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2237                 (unsigned long long)rec->ino);
2238         key.objectid = backref->dir;
2239         key.type = BTRFS_DIR_INDEX_KEY;
2240         key.offset = backref->index;
2241
2242         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2243         BUG_ON(ret);
2244
2245         leaf = path->nodes[0];
2246         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2247
2248         disk_key.objectid = cpu_to_le64(rec->ino);
2249         disk_key.type = BTRFS_INODE_ITEM_KEY;
2250         disk_key.offset = 0;
2251
2252         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2253         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2254         btrfs_set_dir_data_len(leaf, dir_item, 0);
2255         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2256         name_ptr = (unsigned long)(dir_item + 1);
2257         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2258         btrfs_mark_buffer_dirty(leaf);
2259         btrfs_free_path(path);
2260         btrfs_commit_transaction(trans, root);
2261
2262         backref->found_dir_index = 1;
2263         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2264         BUG_ON(IS_ERR(dir_rec));
2265         if (!dir_rec)
2266                 return 0;
2267         dir_rec->found_size += backref->namelen;
2268         if (dir_rec->found_size == dir_rec->isize &&
2269             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2270                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2271         if (dir_rec->found_size != dir_rec->isize)
2272                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2273
2274         return 0;
2275 }
2276
2277 static int delete_dir_index(struct btrfs_root *root,
2278                             struct cache_tree *inode_cache,
2279                             struct inode_record *rec,
2280                             struct inode_backref *backref)
2281 {
2282         struct btrfs_trans_handle *trans;
2283         struct btrfs_dir_item *di;
2284         struct btrfs_path *path;
2285         int ret = 0;
2286
2287         path = btrfs_alloc_path();
2288         if (!path)
2289                 return -ENOMEM;
2290
2291         trans = btrfs_start_transaction(root, 1);
2292         if (IS_ERR(trans)) {
2293                 btrfs_free_path(path);
2294                 return PTR_ERR(trans);
2295         }
2296
2297
2298         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2299                 (unsigned long long)backref->dir,
2300                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2301                 (unsigned long long)root->objectid);
2302
2303         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2304                                     backref->name, backref->namelen,
2305                                     backref->index, -1);
2306         if (IS_ERR(di)) {
2307                 ret = PTR_ERR(di);
2308                 btrfs_free_path(path);
2309                 btrfs_commit_transaction(trans, root);
2310                 if (ret == -ENOENT)
2311                         return 0;
2312                 return ret;
2313         }
2314
2315         if (!di)
2316                 ret = btrfs_del_item(trans, root, path);
2317         else
2318                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2319         BUG_ON(ret);
2320         btrfs_free_path(path);
2321         btrfs_commit_transaction(trans, root);
2322         return ret;
2323 }
2324
2325 static int create_inode_item(struct btrfs_root *root,
2326                              struct inode_record *rec,
2327                              struct inode_backref *backref, int root_dir)
2328 {
2329         struct btrfs_trans_handle *trans;
2330         struct btrfs_inode_item inode_item;
2331         time_t now = time(NULL);
2332         int ret;
2333
2334         trans = btrfs_start_transaction(root, 1);
2335         if (IS_ERR(trans)) {
2336                 ret = PTR_ERR(trans);
2337                 return ret;
2338         }
2339
2340         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2341                 "be incomplete, please check permissions and content after "
2342                 "the fsck completes.\n", (unsigned long long)root->objectid,
2343                 (unsigned long long)rec->ino);
2344
2345         memset(&inode_item, 0, sizeof(inode_item));
2346         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2347         if (root_dir)
2348                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2349         else
2350                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2351         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2352         if (rec->found_dir_item) {
2353                 if (rec->found_file_extent)
2354                         fprintf(stderr, "root %llu inode %llu has both a dir "
2355                                 "item and extents, unsure if it is a dir or a "
2356                                 "regular file so setting it as a directory\n",
2357                                 (unsigned long long)root->objectid,
2358                                 (unsigned long long)rec->ino);
2359                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2360                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2361         } else if (!rec->found_dir_item) {
2362                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2363                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2364         }
2365         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2366         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2367         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2368         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2369         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2370         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2371         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2372         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2373
2374         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2375         BUG_ON(ret);
2376         btrfs_commit_transaction(trans, root);
2377         return 0;
2378 }
2379
2380 static int repair_inode_backrefs(struct btrfs_root *root,
2381                                  struct inode_record *rec,
2382                                  struct cache_tree *inode_cache,
2383                                  int delete)
2384 {
2385         struct inode_backref *tmp, *backref;
2386         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2387         int ret = 0;
2388         int repaired = 0;
2389
2390         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2391                 if (!delete && rec->ino == root_dirid) {
2392                         if (!rec->found_inode_item) {
2393                                 ret = create_inode_item(root, rec, backref, 1);
2394                                 if (ret)
2395                                         break;
2396                                 repaired++;
2397                         }
2398                 }
2399
2400                 /* Index 0 for root dir's are special, don't mess with it */
2401                 if (rec->ino == root_dirid && backref->index == 0)
2402                         continue;
2403
2404                 if (delete &&
2405                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2406                      (backref->found_dir_index && backref->found_inode_ref &&
2407                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2408                         ret = delete_dir_index(root, inode_cache, rec, backref);
2409                         if (ret)
2410                                 break;
2411                         repaired++;
2412                         list_del(&backref->list);
2413                         free(backref);
2414                 }
2415
2416                 if (!delete && !backref->found_dir_index &&
2417                     backref->found_dir_item && backref->found_inode_ref) {
2418                         ret = add_missing_dir_index(root, inode_cache, rec,
2419                                                     backref);
2420                         if (ret)
2421                                 break;
2422                         repaired++;
2423                         if (backref->found_dir_item &&
2424                             backref->found_dir_index &&
2425                             backref->found_dir_index) {
2426                                 if (!backref->errors &&
2427                                     backref->found_inode_ref) {
2428                                         list_del(&backref->list);
2429                                         free(backref);
2430                                 }
2431                         }
2432                 }
2433
2434                 if (!delete && (!backref->found_dir_index &&
2435                                 !backref->found_dir_item &&
2436                                 backref->found_inode_ref)) {
2437                         struct btrfs_trans_handle *trans;
2438                         struct btrfs_key location;
2439
2440                         ret = check_dir_conflict(root, backref->name,
2441                                                  backref->namelen,
2442                                                  backref->dir,
2443                                                  backref->index);
2444                         if (ret) {
2445                                 /*
2446                                  * let nlink fixing routine to handle it,
2447                                  * which can do it better.
2448                                  */
2449                                 ret = 0;
2450                                 break;
2451                         }
2452                         location.objectid = rec->ino;
2453                         location.type = BTRFS_INODE_ITEM_KEY;
2454                         location.offset = 0;
2455
2456                         trans = btrfs_start_transaction(root, 1);
2457                         if (IS_ERR(trans)) {
2458                                 ret = PTR_ERR(trans);
2459                                 break;
2460                         }
2461                         fprintf(stderr, "adding missing dir index/item pair "
2462                                 "for inode %llu\n",
2463                                 (unsigned long long)rec->ino);
2464                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2465                                                     backref->namelen,
2466                                                     backref->dir, &location,
2467                                                     imode_to_type(rec->imode),
2468                                                     backref->index);
2469                         BUG_ON(ret);
2470                         btrfs_commit_transaction(trans, root);
2471                         repaired++;
2472                 }
2473
2474                 if (!delete && (backref->found_inode_ref &&
2475                                 backref->found_dir_index &&
2476                                 backref->found_dir_item &&
2477                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2478                                 !rec->found_inode_item)) {
2479                         ret = create_inode_item(root, rec, backref, 0);
2480                         if (ret)
2481                                 break;
2482                         repaired++;
2483                 }
2484
2485         }
2486         return ret ? ret : repaired;
2487 }
2488
2489 /*
2490  * To determine the file type for nlink/inode_item repair
2491  *
2492  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2493  * Return -ENOENT if file type is not found.
2494  */
2495 static int find_file_type(struct inode_record *rec, u8 *type)
2496 {
2497         struct inode_backref *backref;
2498
2499         /* For inode item recovered case */
2500         if (rec->found_inode_item) {
2501                 *type = imode_to_type(rec->imode);
2502                 return 0;
2503         }
2504
2505         list_for_each_entry(backref, &rec->backrefs, list) {
2506                 if (backref->found_dir_index || backref->found_dir_item) {
2507                         *type = backref->filetype;
2508                         return 0;
2509                 }
2510         }
2511         return -ENOENT;
2512 }
2513
2514 /*
2515  * To determine the file name for nlink repair
2516  *
2517  * Return 0 if file name is found, set name and namelen.
2518  * Return -ENOENT if file name is not found.
2519  */
2520 static int find_file_name(struct inode_record *rec,
2521                           char *name, int *namelen)
2522 {
2523         struct inode_backref *backref;
2524
2525         list_for_each_entry(backref, &rec->backrefs, list) {
2526                 if (backref->found_dir_index || backref->found_dir_item ||
2527                     backref->found_inode_ref) {
2528                         memcpy(name, backref->name, backref->namelen);
2529                         *namelen = backref->namelen;
2530                         return 0;
2531                 }
2532         }
2533         return -ENOENT;
2534 }
2535
2536 /* Reset the nlink of the inode to the correct one */
2537 static int reset_nlink(struct btrfs_trans_handle *trans,
2538                        struct btrfs_root *root,
2539                        struct btrfs_path *path,
2540                        struct inode_record *rec)
2541 {
2542         struct inode_backref *backref;
2543         struct inode_backref *tmp;
2544         struct btrfs_key key;
2545         struct btrfs_inode_item *inode_item;
2546         int ret = 0;
2547
2548         /* We don't believe this either, reset it and iterate backref */
2549         rec->found_link = 0;
2550
2551         /* Remove all backref including the valid ones */
2552         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2553                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2554                                    backref->index, backref->name,
2555                                    backref->namelen, 0);
2556                 if (ret < 0)
2557                         goto out;
2558
2559                 /* remove invalid backref, so it won't be added back */
2560                 if (!(backref->found_dir_index &&
2561                       backref->found_dir_item &&
2562                       backref->found_inode_ref)) {
2563                         list_del(&backref->list);
2564                         free(backref);
2565                 } else {
2566                         rec->found_link++;
2567                 }
2568         }
2569
2570         /* Set nlink to 0 */
2571         key.objectid = rec->ino;
2572         key.type = BTRFS_INODE_ITEM_KEY;
2573         key.offset = 0;
2574         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2575         if (ret < 0)
2576                 goto out;
2577         if (ret > 0) {
2578                 ret = -ENOENT;
2579                 goto out;
2580         }
2581         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2582                                     struct btrfs_inode_item);
2583         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2584         btrfs_mark_buffer_dirty(path->nodes[0]);
2585         btrfs_release_path(path);
2586
2587         /*
2588          * Add back valid inode_ref/dir_item/dir_index,
2589          * add_link() will handle the nlink inc, so new nlink must be correct
2590          */
2591         list_for_each_entry(backref, &rec->backrefs, list) {
2592                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2593                                      backref->name, backref->namelen,
2594                                      backref->filetype, &backref->index, 1);
2595                 if (ret < 0)
2596                         goto out;
2597         }
2598 out:
2599         btrfs_release_path(path);
2600         return ret;
2601 }
2602
2603 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2604                                struct btrfs_root *root,
2605                                struct btrfs_path *path,
2606                                struct inode_record *rec)
2607 {
2608         char *dir_name = "lost+found";
2609         char namebuf[BTRFS_NAME_LEN] = {0};
2610         u64 lost_found_ino;
2611         u32 mode = 0700;
2612         u8 type = 0;
2613         int namelen = 0;
2614         int name_recovered = 0;
2615         int type_recovered = 0;
2616         int ret = 0;
2617
2618         /*
2619          * Get file name and type first before these invalid inode ref
2620          * are deleted by remove_all_invalid_backref()
2621          */
2622         name_recovered = !find_file_name(rec, namebuf, &namelen);
2623         type_recovered = !find_file_type(rec, &type);
2624
2625         if (!name_recovered) {
2626                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2627                        rec->ino, rec->ino);
2628                 namelen = count_digits(rec->ino);
2629                 sprintf(namebuf, "%llu", rec->ino);
2630                 name_recovered = 1;
2631         }
2632         if (!type_recovered) {
2633                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2634                        rec->ino);
2635                 type = BTRFS_FT_REG_FILE;
2636                 type_recovered = 1;
2637         }
2638
2639         ret = reset_nlink(trans, root, path, rec);
2640         if (ret < 0) {
2641                 fprintf(stderr,
2642                         "Failed to reset nlink for inode %llu: %s\n",
2643                         rec->ino, strerror(-ret));
2644                 goto out;
2645         }
2646
2647         if (rec->found_link == 0) {
2648                 lost_found_ino = root->highest_inode;
2649                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2650                         ret = -EOVERFLOW;
2651                         goto out;
2652                 }
2653                 lost_found_ino++;
2654                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2655                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2656                                   mode);
2657                 if (ret < 0) {
2658                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2659                                 dir_name, strerror(-ret));
2660                         goto out;
2661                 }
2662                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2663                                      namebuf, namelen, type, NULL, 1);
2664                 /*
2665                  * Add ".INO" suffix several times to handle case where
2666                  * "FILENAME.INO" is already taken by another file.
2667                  */
2668                 while (ret == -EEXIST) {
2669                         /*
2670                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2671                          */
2672                         if (namelen + count_digits(rec->ino) + 1 >
2673                             BTRFS_NAME_LEN) {
2674                                 ret = -EFBIG;
2675                                 goto out;
2676                         }
2677                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2678                                  ".%llu", rec->ino);
2679                         namelen += count_digits(rec->ino) + 1;
2680                         ret = btrfs_add_link(trans, root, rec->ino,
2681                                              lost_found_ino, namebuf,
2682                                              namelen, type, NULL, 1);
2683                 }
2684                 if (ret < 0) {
2685                         fprintf(stderr,
2686                                 "Failed to link the inode %llu to %s dir: %s\n",
2687                                 rec->ino, dir_name, strerror(-ret));
2688                         goto out;
2689                 }
2690                 /*
2691                  * Just increase the found_link, don't actually add the
2692                  * backref. This will make things easier and this inode
2693                  * record will be freed after the repair is done.
2694                  * So fsck will not report problem about this inode.
2695                  */
2696                 rec->found_link++;
2697                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2698                        namelen, namebuf, dir_name);
2699         }
2700         printf("Fixed the nlink of inode %llu\n", rec->ino);
2701 out:
2702         /*
2703          * Clear the flag anyway, or we will loop forever for the same inode
2704          * as it will not be removed from the bad inode list and the dead loop
2705          * happens.
2706          */
2707         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2708         btrfs_release_path(path);
2709         return ret;
2710 }
2711
2712 /*
2713  * Check if there is any normal(reg or prealloc) file extent for given
2714  * ino.
2715  * This is used to determine the file type when neither its dir_index/item or
2716  * inode_item exists.
2717  *
2718  * This will *NOT* report error, if any error happens, just consider it does
2719  * not have any normal file extent.
2720  */
2721 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2722 {
2723         struct btrfs_path *path;
2724         struct btrfs_key key;
2725         struct btrfs_key found_key;
2726         struct btrfs_file_extent_item *fi;
2727         u8 type;
2728         int ret = 0;
2729
2730         path = btrfs_alloc_path();
2731         if (!path)
2732                 goto out;
2733         key.objectid = ino;
2734         key.type = BTRFS_EXTENT_DATA_KEY;
2735         key.offset = 0;
2736
2737         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2738         if (ret < 0) {
2739                 ret = 0;
2740                 goto out;
2741         }
2742         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2743                 ret = btrfs_next_leaf(root, path);
2744                 if (ret) {
2745                         ret = 0;
2746                         goto out;
2747                 }
2748         }
2749         while (1) {
2750                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2751                                       path->slots[0]);
2752                 if (found_key.objectid != ino ||
2753                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2754                         break;
2755                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2756                                     struct btrfs_file_extent_item);
2757                 type = btrfs_file_extent_type(path->nodes[0], fi);
2758                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2759                         ret = 1;
2760                         goto out;
2761                 }
2762         }
2763 out:
2764         btrfs_free_path(path);
2765         return ret;
2766 }
2767
2768 static u32 btrfs_type_to_imode(u8 type)
2769 {
2770         static u32 imode_by_btrfs_type[] = {
2771                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2772                 [BTRFS_FT_DIR]          = S_IFDIR,
2773                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2774                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2775                 [BTRFS_FT_FIFO]         = S_IFIFO,
2776                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2777                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2778         };
2779
2780         return imode_by_btrfs_type[(type)];
2781 }
2782
2783 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2784                                 struct btrfs_root *root,
2785                                 struct btrfs_path *path,
2786                                 struct inode_record *rec)
2787 {
2788         u8 filetype;
2789         u32 mode = 0700;
2790         int type_recovered = 0;
2791         int ret = 0;
2792
2793         printf("Trying to rebuild inode:%llu\n", rec->ino);
2794
2795         type_recovered = !find_file_type(rec, &filetype);
2796
2797         /*
2798          * Try to determine inode type if type not found.
2799          *
2800          * For found regular file extent, it must be FILE.
2801          * For found dir_item/index, it must be DIR.
2802          *
2803          * For undetermined one, use FILE as fallback.
2804          *
2805          * TODO:
2806          * 1. If found backref(inode_index/item is already handled) to it,
2807          *    it must be DIR.
2808          *    Need new inode-inode ref structure to allow search for that.
2809          */
2810         if (!type_recovered) {
2811                 if (rec->found_file_extent &&
2812                     find_normal_file_extent(root, rec->ino)) {
2813                         type_recovered = 1;
2814                         filetype = BTRFS_FT_REG_FILE;
2815                 } else if (rec->found_dir_item) {
2816                         type_recovered = 1;
2817                         filetype = BTRFS_FT_DIR;
2818                 } else if (!list_empty(&rec->orphan_extents)) {
2819                         type_recovered = 1;
2820                         filetype = BTRFS_FT_REG_FILE;
2821                 } else{
2822                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2823                                rec->ino);
2824                         type_recovered = 1;
2825                         filetype = BTRFS_FT_REG_FILE;
2826                 }
2827         }
2828
2829         ret = btrfs_new_inode(trans, root, rec->ino,
2830                               mode | btrfs_type_to_imode(filetype));
2831         if (ret < 0)
2832                 goto out;
2833
2834         /*
2835          * Here inode rebuild is done, we only rebuild the inode item,
2836          * don't repair the nlink(like move to lost+found).
2837          * That is the job of nlink repair.
2838          *
2839          * We just fill the record and return
2840          */
2841         rec->found_dir_item = 1;
2842         rec->imode = mode | btrfs_type_to_imode(filetype);
2843         rec->nlink = 0;
2844         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2845         /* Ensure the inode_nlinks repair function will be called */
2846         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2847 out:
2848         return ret;
2849 }
2850
2851 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2852                                       struct btrfs_root *root,
2853                                       struct btrfs_path *path,
2854                                       struct inode_record *rec)
2855 {
2856         struct orphan_data_extent *orphan;
2857         struct orphan_data_extent *tmp;
2858         int ret = 0;
2859
2860         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2861                 /*
2862                  * Check for conflicting file extents
2863                  *
2864                  * Here we don't know whether the extents is compressed or not,
2865                  * so we can only assume it not compressed nor data offset,
2866                  * and use its disk_len as extent length.
2867                  */
2868                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2869                                        orphan->offset, orphan->disk_len, 0);
2870                 btrfs_release_path(path);
2871                 if (ret < 0)
2872                         goto out;
2873                 if (!ret) {
2874                         fprintf(stderr,
2875                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2876                                 orphan->disk_bytenr, orphan->disk_len);
2877                         ret = btrfs_free_extent(trans,
2878                                         root->fs_info->extent_root,
2879                                         orphan->disk_bytenr, orphan->disk_len,
2880                                         0, root->objectid, orphan->objectid,
2881                                         orphan->offset);
2882                         if (ret < 0)
2883                                 goto out;
2884                 }
2885                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2886                                 orphan->offset, orphan->disk_bytenr,
2887                                 orphan->disk_len, orphan->disk_len);
2888                 if (ret < 0)
2889                         goto out;
2890
2891                 /* Update file size info */
2892                 rec->found_size += orphan->disk_len;
2893                 if (rec->found_size == rec->nbytes)
2894                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2895
2896                 /* Update the file extent hole info too */
2897                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2898                                            orphan->disk_len);
2899                 if (ret < 0)
2900                         goto out;
2901                 if (RB_EMPTY_ROOT(&rec->holes))
2902                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2903
2904                 list_del(&orphan->list);
2905                 free(orphan);
2906         }
2907         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2908 out:
2909         return ret;
2910 }
2911
2912 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2913                                         struct btrfs_root *root,
2914                                         struct btrfs_path *path,
2915                                         struct inode_record *rec)
2916 {
2917         struct rb_node *node;
2918         struct file_extent_hole *hole;
2919         int found = 0;
2920         int ret = 0;
2921
2922         node = rb_first(&rec->holes);
2923
2924         while (node) {
2925                 found = 1;
2926                 hole = rb_entry(node, struct file_extent_hole, node);
2927                 ret = btrfs_punch_hole(trans, root, rec->ino,
2928                                        hole->start, hole->len);
2929                 if (ret < 0)
2930                         goto out;
2931                 ret = del_file_extent_hole(&rec->holes, hole->start,
2932                                            hole->len);
2933                 if (ret < 0)
2934                         goto out;
2935                 if (RB_EMPTY_ROOT(&rec->holes))
2936                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2937                 node = rb_first(&rec->holes);
2938         }
2939         /* special case for a file losing all its file extent */
2940         if (!found) {
2941                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2942                                        round_up(rec->isize, root->sectorsize));
2943                 if (ret < 0)
2944                         goto out;
2945         }
2946         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2947                rec->ino, root->objectid);
2948 out:
2949         return ret;
2950 }
2951
2952 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2953 {
2954         struct btrfs_trans_handle *trans;
2955         struct btrfs_path *path;
2956         int ret = 0;
2957
2958         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2959                              I_ERR_NO_ORPHAN_ITEM |
2960                              I_ERR_LINK_COUNT_WRONG |
2961                              I_ERR_NO_INODE_ITEM |
2962                              I_ERR_FILE_EXTENT_ORPHAN |
2963                              I_ERR_FILE_EXTENT_DISCOUNT|
2964                              I_ERR_FILE_NBYTES_WRONG)))
2965                 return rec->errors;
2966
2967         path = btrfs_alloc_path();
2968         if (!path)
2969                 return -ENOMEM;
2970
2971         /*
2972          * For nlink repair, it may create a dir and add link, so
2973          * 2 for parent(256)'s dir_index and dir_item
2974          * 2 for lost+found dir's inode_item and inode_ref
2975          * 1 for the new inode_ref of the file
2976          * 2 for lost+found dir's dir_index and dir_item for the file
2977          */
2978         trans = btrfs_start_transaction(root, 7);
2979         if (IS_ERR(trans)) {
2980                 btrfs_free_path(path);
2981                 return PTR_ERR(trans);
2982         }
2983
2984         if (rec->errors & I_ERR_NO_INODE_ITEM)
2985                 ret = repair_inode_no_item(trans, root, path, rec);
2986         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2987                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2988         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2989                 ret = repair_inode_discount_extent(trans, root, path, rec);
2990         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2991                 ret = repair_inode_isize(trans, root, path, rec);
2992         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2993                 ret = repair_inode_orphan_item(trans, root, path, rec);
2994         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2995                 ret = repair_inode_nlinks(trans, root, path, rec);
2996         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2997                 ret = repair_inode_nbytes(trans, root, path, rec);
2998         btrfs_commit_transaction(trans, root);
2999         btrfs_free_path(path);
3000         return ret;
3001 }
3002
3003 static int check_inode_recs(struct btrfs_root *root,
3004                             struct cache_tree *inode_cache)
3005 {
3006         struct cache_extent *cache;
3007         struct ptr_node *node;
3008         struct inode_record *rec;
3009         struct inode_backref *backref;
3010         int stage = 0;
3011         int ret = 0;
3012         int err = 0;
3013         u64 error = 0;
3014         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3015
3016         if (btrfs_root_refs(&root->root_item) == 0) {
3017                 if (!cache_tree_empty(inode_cache))
3018                         fprintf(stderr, "warning line %d\n", __LINE__);
3019                 return 0;
3020         }
3021
3022         /*
3023          * We need to record the highest inode number for later 'lost+found'
3024          * dir creation.
3025          * We must select an ino not used/referred by any existing inode, or
3026          * 'lost+found' ino may be a missing ino in a corrupted leaf,
3027          * this may cause 'lost+found' dir has wrong nlinks.
3028          */
3029         cache = last_cache_extent(inode_cache);
3030         if (cache) {
3031                 node = container_of(cache, struct ptr_node, cache);
3032                 rec = node->data;
3033                 if (rec->ino > root->highest_inode)
3034                         root->highest_inode = rec->ino;
3035         }
3036
3037         /*
3038          * We need to repair backrefs first because we could change some of the
3039          * errors in the inode recs.
3040          *
3041          * We also need to go through and delete invalid backrefs first and then
3042          * add the correct ones second.  We do this because we may get EEXIST
3043          * when adding back the correct index because we hadn't yet deleted the
3044          * invalid index.
3045          *
3046          * For example, if we were missing a dir index then the directories
3047          * isize would be wrong, so if we fixed the isize to what we thought it
3048          * would be and then fixed the backref we'd still have a invalid fs, so
3049          * we need to add back the dir index and then check to see if the isize
3050          * is still wrong.
3051          */
3052         while (stage < 3) {
3053                 stage++;
3054                 if (stage == 3 && !err)
3055                         break;
3056
3057                 cache = search_cache_extent(inode_cache, 0);
3058                 while (repair && cache) {
3059                         node = container_of(cache, struct ptr_node, cache);
3060                         rec = node->data;
3061                         cache = next_cache_extent(cache);
3062
3063                         /* Need to free everything up and rescan */
3064                         if (stage == 3) {
3065                                 remove_cache_extent(inode_cache, &node->cache);
3066                                 free(node);
3067                                 free_inode_rec(rec);
3068                                 continue;
3069                         }
3070
3071                         if (list_empty(&rec->backrefs))
3072                                 continue;
3073
3074                         ret = repair_inode_backrefs(root, rec, inode_cache,
3075                                                     stage == 1);
3076                         if (ret < 0) {
3077                                 err = ret;
3078                                 stage = 2;
3079                                 break;
3080                         } if (ret > 0) {
3081                                 err = -EAGAIN;
3082                         }
3083                 }
3084         }
3085         if (err)
3086                 return err;
3087
3088         rec = get_inode_rec(inode_cache, root_dirid, 0);
3089         BUG_ON(IS_ERR(rec));
3090         if (rec) {
3091                 ret = check_root_dir(rec);
3092                 if (ret) {
3093                         fprintf(stderr, "root %llu root dir %llu error\n",
3094                                 (unsigned long long)root->root_key.objectid,
3095                                 (unsigned long long)root_dirid);
3096                         print_inode_error(root, rec);
3097                         error++;
3098                 }
3099         } else {
3100                 if (repair) {
3101                         struct btrfs_trans_handle *trans;
3102
3103                         trans = btrfs_start_transaction(root, 1);
3104                         if (IS_ERR(trans)) {
3105                                 err = PTR_ERR(trans);
3106                                 return err;
3107                         }
3108
3109                         fprintf(stderr,
3110                                 "root %llu missing its root dir, recreating\n",
3111                                 (unsigned long long)root->objectid);
3112
3113                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3114                         BUG_ON(ret);
3115
3116                         btrfs_commit_transaction(trans, root);
3117                         return -EAGAIN;
3118                 }
3119
3120                 fprintf(stderr, "root %llu root dir %llu not found\n",
3121                         (unsigned long long)root->root_key.objectid,
3122                         (unsigned long long)root_dirid);
3123         }
3124
3125         while (1) {
3126                 cache = search_cache_extent(inode_cache, 0);
3127                 if (!cache)
3128                         break;
3129                 node = container_of(cache, struct ptr_node, cache);
3130                 rec = node->data;
3131                 remove_cache_extent(inode_cache, &node->cache);
3132                 free(node);
3133                 if (rec->ino == root_dirid ||
3134                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3135                         free_inode_rec(rec);
3136                         continue;
3137                 }
3138
3139                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3140                         ret = check_orphan_item(root, rec->ino);
3141                         if (ret == 0)
3142                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3143                         if (can_free_inode_rec(rec)) {
3144                                 free_inode_rec(rec);
3145                                 continue;
3146                         }
3147                 }
3148
3149                 if (!rec->found_inode_item)
3150                         rec->errors |= I_ERR_NO_INODE_ITEM;
3151                 if (rec->found_link != rec->nlink)
3152                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3153                 if (repair) {
3154                         ret = try_repair_inode(root, rec);
3155                         if (ret == 0 && can_free_inode_rec(rec)) {
3156                                 free_inode_rec(rec);
3157                                 continue;
3158                         }
3159                         ret = 0;
3160                 }
3161
3162                 if (!(repair && ret == 0))
3163                         error++;
3164                 print_inode_error(root, rec);
3165                 list_for_each_entry(backref, &rec->backrefs, list) {
3166                         if (!backref->found_dir_item)
3167                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3168                         if (!backref->found_dir_index)
3169                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3170                         if (!backref->found_inode_ref)
3171                                 backref->errors |= REF_ERR_NO_INODE_REF;
3172                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3173                                 " namelen %u name %s filetype %d errors %x",
3174                                 (unsigned long long)backref->dir,
3175                                 (unsigned long long)backref->index,
3176                                 backref->namelen, backref->name,
3177                                 backref->filetype, backref->errors);
3178                         print_ref_error(backref->errors);
3179                 }
3180                 free_inode_rec(rec);
3181         }
3182         return (error > 0) ? -1 : 0;
3183 }
3184
3185 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3186                                         u64 objectid)
3187 {
3188         struct cache_extent *cache;
3189         struct root_record *rec = NULL;
3190         int ret;
3191
3192         cache = lookup_cache_extent(root_cache, objectid, 1);
3193         if (cache) {
3194                 rec = container_of(cache, struct root_record, cache);
3195         } else {
3196                 rec = calloc(1, sizeof(*rec));
3197                 if (!rec)
3198                         return ERR_PTR(-ENOMEM);
3199                 rec->objectid = objectid;
3200                 INIT_LIST_HEAD(&rec->backrefs);
3201                 rec->cache.start = objectid;
3202                 rec->cache.size = 1;
3203
3204                 ret = insert_cache_extent(root_cache, &rec->cache);
3205                 if (ret)
3206                         return ERR_PTR(-EEXIST);
3207         }
3208         return rec;
3209 }
3210
3211 static struct root_backref *get_root_backref(struct root_record *rec,
3212                                              u64 ref_root, u64 dir, u64 index,
3213                                              const char *name, int namelen)
3214 {
3215         struct root_backref *backref;
3216
3217         list_for_each_entry(backref, &rec->backrefs, list) {
3218                 if (backref->ref_root != ref_root || backref->dir != dir ||
3219                     backref->namelen != namelen)
3220                         continue;
3221                 if (memcmp(name, backref->name, namelen))
3222                         continue;
3223                 return backref;
3224         }
3225
3226         backref = calloc(1, sizeof(*backref) + namelen + 1);
3227         if (!backref)
3228                 return NULL;
3229         backref->ref_root = ref_root;
3230         backref->dir = dir;
3231         backref->index = index;
3232         backref->namelen = namelen;
3233         memcpy(backref->name, name, namelen);
3234         backref->name[namelen] = '\0';
3235         list_add_tail(&backref->list, &rec->backrefs);
3236         return backref;
3237 }
3238
3239 static void free_root_record(struct cache_extent *cache)
3240 {
3241         struct root_record *rec;
3242         struct root_backref *backref;
3243
3244         rec = container_of(cache, struct root_record, cache);
3245         while (!list_empty(&rec->backrefs)) {
3246                 backref = to_root_backref(rec->backrefs.next);
3247                 list_del(&backref->list);
3248                 free(backref);
3249         }
3250
3251         kfree(rec);
3252 }
3253
3254 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3255
3256 static int add_root_backref(struct cache_tree *root_cache,
3257                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3258                             const char *name, int namelen,
3259                             int item_type, int errors)
3260 {
3261         struct root_record *rec;
3262         struct root_backref *backref;
3263
3264         rec = get_root_rec(root_cache, root_id);
3265         BUG_ON(IS_ERR(rec));
3266         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3267         BUG_ON(!backref);
3268
3269         backref->errors |= errors;
3270
3271         if (item_type != BTRFS_DIR_ITEM_KEY) {
3272                 if (backref->found_dir_index || backref->found_back_ref ||
3273                     backref->found_forward_ref) {
3274                         if (backref->index != index)
3275                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3276                 } else {
3277                         backref->index = index;
3278                 }
3279         }
3280
3281         if (item_type == BTRFS_DIR_ITEM_KEY) {
3282                 if (backref->found_forward_ref)
3283                         rec->found_ref++;
3284                 backref->found_dir_item = 1;
3285         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3286                 backref->found_dir_index = 1;
3287         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3288                 if (backref->found_forward_ref)
3289                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3290                 else if (backref->found_dir_item)
3291                         rec->found_ref++;
3292                 backref->found_forward_ref = 1;
3293         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3294                 if (backref->found_back_ref)
3295                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3296                 backref->found_back_ref = 1;
3297         } else {
3298                 BUG_ON(1);
3299         }
3300
3301         if (backref->found_forward_ref && backref->found_dir_item)
3302                 backref->reachable = 1;
3303         return 0;
3304 }
3305
3306 static int merge_root_recs(struct btrfs_root *root,
3307                            struct cache_tree *src_cache,
3308                            struct cache_tree *dst_cache)
3309 {
3310         struct cache_extent *cache;
3311         struct ptr_node *node;
3312         struct inode_record *rec;
3313         struct inode_backref *backref;
3314         int ret = 0;
3315
3316         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3317                 free_inode_recs_tree(src_cache);
3318                 return 0;
3319         }
3320
3321         while (1) {
3322                 cache = search_cache_extent(src_cache, 0);
3323                 if (!cache)
3324                         break;
3325                 node = container_of(cache, struct ptr_node, cache);
3326                 rec = node->data;
3327                 remove_cache_extent(src_cache, &node->cache);
3328                 free(node);
3329
3330                 ret = is_child_root(root, root->objectid, rec->ino);
3331                 if (ret < 0)
3332                         break;
3333                 else if (ret == 0)
3334                         goto skip;
3335
3336                 list_for_each_entry(backref, &rec->backrefs, list) {
3337                         BUG_ON(backref->found_inode_ref);
3338                         if (backref->found_dir_item)
3339                                 add_root_backref(dst_cache, rec->ino,
3340                                         root->root_key.objectid, backref->dir,
3341                                         backref->index, backref->name,
3342                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3343                                         backref->errors);
3344                         if (backref->found_dir_index)
3345                                 add_root_backref(dst_cache, rec->ino,
3346                                         root->root_key.objectid, backref->dir,
3347                                         backref->index, backref->name,
3348                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3349                                         backref->errors);
3350                 }
3351 skip:
3352                 free_inode_rec(rec);
3353         }
3354         if (ret < 0)
3355                 return ret;
3356         return 0;
3357 }
3358
3359 static int check_root_refs(struct btrfs_root *root,
3360                            struct cache_tree *root_cache)
3361 {
3362         struct root_record *rec;
3363         struct root_record *ref_root;
3364         struct root_backref *backref;
3365         struct cache_extent *cache;
3366         int loop = 1;
3367         int ret;
3368         int error;
3369         int errors = 0;
3370
3371         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3372         BUG_ON(IS_ERR(rec));
3373         rec->found_ref = 1;
3374
3375         /* fixme: this can not detect circular references */
3376         while (loop) {
3377                 loop = 0;
3378                 cache = search_cache_extent(root_cache, 0);
3379                 while (1) {
3380                         if (!cache)
3381                                 break;
3382                         rec = container_of(cache, struct root_record, cache);
3383                         cache = next_cache_extent(cache);
3384
3385                         if (rec->found_ref == 0)
3386                                 continue;
3387
3388                         list_for_each_entry(backref, &rec->backrefs, list) {
3389                                 if (!backref->reachable)
3390                                         continue;
3391
3392                                 ref_root = get_root_rec(root_cache,
3393                                                         backref->ref_root);
3394                                 BUG_ON(IS_ERR(ref_root));
3395                                 if (ref_root->found_ref > 0)
3396                                         continue;
3397
3398                                 backref->reachable = 0;
3399                                 rec->found_ref--;
3400                                 if (rec->found_ref == 0)
3401                                         loop = 1;
3402                         }
3403                 }
3404         }
3405
3406         cache = search_cache_extent(root_cache, 0);
3407         while (1) {
3408                 if (!cache)
3409                         break;
3410                 rec = container_of(cache, struct root_record, cache);
3411                 cache = next_cache_extent(cache);
3412
3413                 if (rec->found_ref == 0 &&
3414                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3415                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3416                         ret = check_orphan_item(root->fs_info->tree_root,
3417                                                 rec->objectid);
3418                         if (ret == 0)
3419                                 continue;
3420
3421                         /*
3422                          * If we don't have a root item then we likely just have
3423                          * a dir item in a snapshot for this root but no actual
3424                          * ref key or anything so it's meaningless.
3425                          */
3426                         if (!rec->found_root_item)
3427                                 continue;
3428                         errors++;
3429                         fprintf(stderr, "fs tree %llu not referenced\n",
3430                                 (unsigned long long)rec->objectid);
3431                 }
3432
3433                 error = 0;
3434                 if (rec->found_ref > 0 && !rec->found_root_item)
3435                         error = 1;
3436                 list_for_each_entry(backref, &rec->backrefs, list) {
3437                         if (!backref->found_dir_item)
3438                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3439                         if (!backref->found_dir_index)
3440                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3441                         if (!backref->found_back_ref)
3442                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3443                         if (!backref->found_forward_ref)
3444                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3445                         if (backref->reachable && backref->errors)
3446                                 error = 1;
3447                 }
3448                 if (!error)
3449                         continue;
3450
3451                 errors++;
3452                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3453                         (unsigned long long)rec->objectid, rec->found_ref,
3454                          rec->found_root_item ? "" : "not found");
3455
3456                 list_for_each_entry(backref, &rec->backrefs, list) {
3457                         if (!backref->reachable)
3458                                 continue;
3459                         if (!backref->errors && rec->found_root_item)
3460                                 continue;
3461                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3462                                 " index %llu namelen %u name %s errors %x\n",
3463                                 (unsigned long long)backref->ref_root,
3464                                 (unsigned long long)backref->dir,
3465                                 (unsigned long long)backref->index,
3466                                 backref->namelen, backref->name,
3467                                 backref->errors);
3468                         print_ref_error(backref->errors);
3469                 }
3470         }
3471         return errors > 0 ? 1 : 0;
3472 }
3473
3474 static int process_root_ref(struct extent_buffer *eb, int slot,
3475                             struct btrfs_key *key,
3476                             struct cache_tree *root_cache)
3477 {
3478         u64 dirid;
3479         u64 index;
3480         u32 len;
3481         u32 name_len;
3482         struct btrfs_root_ref *ref;
3483         char namebuf[BTRFS_NAME_LEN];
3484         int error;
3485
3486         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3487
3488         dirid = btrfs_root_ref_dirid(eb, ref);
3489         index = btrfs_root_ref_sequence(eb, ref);
3490         name_len = btrfs_root_ref_name_len(eb, ref);
3491
3492         if (name_len <= BTRFS_NAME_LEN) {
3493                 len = name_len;
3494                 error = 0;
3495         } else {
3496                 len = BTRFS_NAME_LEN;
3497                 error = REF_ERR_NAME_TOO_LONG;
3498         }
3499         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3500
3501         if (key->type == BTRFS_ROOT_REF_KEY) {
3502                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3503                                  index, namebuf, len, key->type, error);
3504         } else {
3505                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3506                                  index, namebuf, len, key->type, error);
3507         }
3508         return 0;
3509 }
3510
3511 static void free_corrupt_block(struct cache_extent *cache)
3512 {
3513         struct btrfs_corrupt_block *corrupt;
3514
3515         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3516         free(corrupt);
3517 }
3518
3519 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3520
3521 /*
3522  * Repair the btree of the given root.
3523  *
3524  * The fix is to remove the node key in corrupt_blocks cache_tree.
3525  * and rebalance the tree.
3526  * After the fix, the btree should be writeable.
3527  */
3528 static int repair_btree(struct btrfs_root *root,
3529                         struct cache_tree *corrupt_blocks)
3530 {
3531         struct btrfs_trans_handle *trans;
3532         struct btrfs_path *path;
3533         struct btrfs_corrupt_block *corrupt;
3534         struct cache_extent *cache;
3535         struct btrfs_key key;
3536         u64 offset;
3537         int level;
3538         int ret = 0;
3539
3540         if (cache_tree_empty(corrupt_blocks))
3541                 return 0;
3542
3543         path = btrfs_alloc_path();
3544         if (!path)
3545                 return -ENOMEM;
3546
3547         trans = btrfs_start_transaction(root, 1);
3548         if (IS_ERR(trans)) {
3549                 ret = PTR_ERR(trans);
3550                 fprintf(stderr, "Error starting transaction: %s\n",
3551                         strerror(-ret));
3552                 goto out_free_path;
3553         }
3554         cache = first_cache_extent(corrupt_blocks);
3555         while (cache) {
3556                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3557                                        cache);
3558                 level = corrupt->level;
3559                 path->lowest_level = level;
3560                 key.objectid = corrupt->key.objectid;
3561                 key.type = corrupt->key.type;
3562                 key.offset = corrupt->key.offset;
3563
3564                 /*
3565                  * Here we don't want to do any tree balance, since it may
3566                  * cause a balance with corrupted brother leaf/node,
3567                  * so ins_len set to 0 here.
3568                  * Balance will be done after all corrupt node/leaf is deleted.
3569                  */
3570                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3571                 if (ret < 0)
3572                         goto out;
3573                 offset = btrfs_node_blockptr(path->nodes[level],
3574                                              path->slots[level]);
3575
3576                 /* Remove the ptr */
3577                 ret = btrfs_del_ptr(trans, root, path, level,
3578                                     path->slots[level]);
3579                 if (ret < 0)
3580                         goto out;
3581                 /*
3582                  * Remove the corresponding extent
3583                  * return value is not concerned.
3584                  */
3585                 btrfs_release_path(path);
3586                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3587                                         0, root->root_key.objectid,
3588                                         level - 1, 0);
3589                 cache = next_cache_extent(cache);
3590         }
3591
3592         /* Balance the btree using btrfs_search_slot() */
3593         cache = first_cache_extent(corrupt_blocks);
3594         while (cache) {
3595                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3596                                        cache);
3597                 memcpy(&key, &corrupt->key, sizeof(key));
3598                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3599                 if (ret < 0)
3600                         goto out;
3601                 /* return will always >0 since it won't find the item */
3602                 ret = 0;
3603                 btrfs_release_path(path);
3604                 cache = next_cache_extent(cache);
3605         }
3606 out:
3607         btrfs_commit_transaction(trans, root);
3608 out_free_path:
3609         btrfs_free_path(path);
3610         return ret;
3611 }
3612
3613 static int check_fs_root(struct btrfs_root *root,
3614                          struct cache_tree *root_cache,
3615                          struct walk_control *wc)
3616 {
3617         int ret = 0;
3618         int err = 0;
3619         int wret;
3620         int level;
3621         struct btrfs_path path;
3622         struct shared_node root_node;
3623         struct root_record *rec;
3624         struct btrfs_root_item *root_item = &root->root_item;
3625         struct cache_tree corrupt_blocks;
3626         struct orphan_data_extent *orphan;
3627         struct orphan_data_extent *tmp;
3628         enum btrfs_tree_block_status status;
3629
3630         /*
3631          * Reuse the corrupt_block cache tree to record corrupted tree block
3632          *
3633          * Unlike the usage in extent tree check, here we do it in a per
3634          * fs/subvol tree base.
3635          */
3636         cache_tree_init(&corrupt_blocks);
3637         root->fs_info->corrupt_blocks = &corrupt_blocks;
3638
3639         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3640                 rec = get_root_rec(root_cache, root->root_key.objectid);
3641                 BUG_ON(IS_ERR(rec));
3642                 if (btrfs_root_refs(root_item) > 0)
3643                         rec->found_root_item = 1;
3644         }
3645
3646         btrfs_init_path(&path);
3647         memset(&root_node, 0, sizeof(root_node));
3648         cache_tree_init(&root_node.root_cache);
3649         cache_tree_init(&root_node.inode_cache);
3650
3651         /* Move the orphan extent record to corresponding inode_record */
3652         list_for_each_entry_safe(orphan, tmp,
3653                                  &root->orphan_data_extents, list) {
3654                 struct inode_record *inode;
3655
3656                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3657                                       1);
3658                 BUG_ON(IS_ERR(inode));
3659                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3660                 list_move(&orphan->list, &inode->orphan_extents);
3661         }
3662
3663         level = btrfs_header_level(root->node);
3664         memset(wc->nodes, 0, sizeof(wc->nodes));
3665         wc->nodes[level] = &root_node;
3666         wc->active_node = level;
3667         wc->root_level = level;
3668
3669         /* We may not have checked the root block, lets do that now */
3670         if (btrfs_is_leaf(root->node))
3671                 status = btrfs_check_leaf(root, NULL, root->node);
3672         else
3673                 status = btrfs_check_node(root, NULL, root->node);
3674         if (status != BTRFS_TREE_BLOCK_CLEAN)
3675                 return -EIO;
3676
3677         if (btrfs_root_refs(root_item) > 0 ||
3678             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3679                 path.nodes[level] = root->node;
3680                 extent_buffer_get(root->node);
3681                 path.slots[level] = 0;
3682         } else {
3683                 struct btrfs_key key;
3684                 struct btrfs_disk_key found_key;
3685
3686                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3687                 level = root_item->drop_level;
3688                 path.lowest_level = level;
3689                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3690                 if (wret < 0)
3691                         goto skip_walking;
3692                 btrfs_node_key(path.nodes[level], &found_key,
3693                                 path.slots[level]);
3694                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3695                                         sizeof(found_key)));
3696         }
3697
3698         while (1) {
3699                 wret = walk_down_tree(root, &path, wc, &level);
3700                 if (wret < 0)
3701                         ret = wret;
3702                 if (wret != 0)
3703                         break;
3704
3705                 wret = walk_up_tree(root, &path, wc, &level);
3706                 if (wret < 0)
3707                         ret = wret;
3708                 if (wret != 0)
3709                         break;
3710         }
3711 skip_walking:
3712         btrfs_release_path(&path);
3713
3714         if (!cache_tree_empty(&corrupt_blocks)) {
3715                 struct cache_extent *cache;
3716                 struct btrfs_corrupt_block *corrupt;
3717
3718                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3719                        root->root_key.objectid);
3720                 cache = first_cache_extent(&corrupt_blocks);
3721                 while (cache) {
3722                         corrupt = container_of(cache,
3723                                                struct btrfs_corrupt_block,
3724                                                cache);
3725                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3726                                cache->start, corrupt->level,
3727                                corrupt->key.objectid, corrupt->key.type,
3728                                corrupt->key.offset);
3729                         cache = next_cache_extent(cache);
3730                 }
3731                 if (repair) {
3732                         printf("Try to repair the btree for root %llu\n",
3733                                root->root_key.objectid);
3734                         ret = repair_btree(root, &corrupt_blocks);
3735                         if (ret < 0)
3736                                 fprintf(stderr, "Failed to repair btree: %s\n",
3737                                         strerror(-ret));
3738                         if (!ret)
3739                                 printf("Btree for root %llu is fixed\n",
3740                                        root->root_key.objectid);
3741                 }
3742         }
3743
3744         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3745         if (err < 0)
3746                 ret = err;
3747
3748         if (root_node.current) {
3749                 root_node.current->checked = 1;
3750                 maybe_free_inode_rec(&root_node.inode_cache,
3751                                 root_node.current);
3752         }
3753
3754         err = check_inode_recs(root, &root_node.inode_cache);
3755         if (!ret)
3756                 ret = err;
3757
3758         free_corrupt_blocks_tree(&corrupt_blocks);
3759         root->fs_info->corrupt_blocks = NULL;
3760         free_orphan_data_extents(&root->orphan_data_extents);
3761         return ret;
3762 }
3763
3764 static int fs_root_objectid(u64 objectid)
3765 {
3766         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3767             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3768                 return 1;
3769         return is_fstree(objectid);
3770 }
3771
3772 static int check_fs_roots(struct btrfs_root *root,
3773                           struct cache_tree *root_cache)
3774 {
3775         struct btrfs_path path;
3776         struct btrfs_key key;
3777         struct walk_control wc;
3778         struct extent_buffer *leaf, *tree_node;
3779         struct btrfs_root *tmp_root;
3780         struct btrfs_root *tree_root = root->fs_info->tree_root;
3781         int ret;
3782         int err = 0;
3783
3784         if (ctx.progress_enabled) {
3785                 ctx.tp = TASK_FS_ROOTS;
3786                 task_start(ctx.info);
3787         }
3788
3789         /*
3790          * Just in case we made any changes to the extent tree that weren't
3791          * reflected into the free space cache yet.
3792          */
3793         if (repair)
3794                 reset_cached_block_groups(root->fs_info);
3795         memset(&wc, 0, sizeof(wc));
3796         cache_tree_init(&wc.shared);
3797         btrfs_init_path(&path);
3798
3799 again:
3800         key.offset = 0;
3801         key.objectid = 0;
3802         key.type = BTRFS_ROOT_ITEM_KEY;
3803         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3804         if (ret < 0) {
3805                 err = 1;
3806                 goto out;
3807         }
3808         tree_node = tree_root->node;
3809         while (1) {
3810                 if (tree_node != tree_root->node) {
3811                         free_root_recs_tree(root_cache);
3812                         btrfs_release_path(&path);
3813                         goto again;
3814                 }
3815                 leaf = path.nodes[0];
3816                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3817                         ret = btrfs_next_leaf(tree_root, &path);
3818                         if (ret) {
3819                                 if (ret < 0)
3820                                         err = 1;
3821                                 break;
3822                         }
3823                         leaf = path.nodes[0];
3824                 }
3825                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3826                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3827                     fs_root_objectid(key.objectid)) {
3828                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3829                                 tmp_root = btrfs_read_fs_root_no_cache(
3830                                                 root->fs_info, &key);
3831                         } else {
3832                                 key.offset = (u64)-1;
3833                                 tmp_root = btrfs_read_fs_root(
3834                                                 root->fs_info, &key);
3835                         }
3836                         if (IS_ERR(tmp_root)) {
3837                                 err = 1;
3838                                 goto next;
3839                         }
3840                         ret = check_fs_root(tmp_root, root_cache, &wc);
3841                         if (ret == -EAGAIN) {
3842                                 free_root_recs_tree(root_cache);
3843                                 btrfs_release_path(&path);
3844                                 goto again;
3845                         }
3846                         if (ret)
3847                                 err = 1;
3848                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3849                                 btrfs_free_fs_root(tmp_root);
3850                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3851                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3852                         process_root_ref(leaf, path.slots[0], &key,
3853                                          root_cache);
3854                 }
3855 next:
3856                 path.slots[0]++;
3857         }
3858 out:
3859         btrfs_release_path(&path);
3860         if (err)
3861                 free_extent_cache_tree(&wc.shared);
3862         if (!cache_tree_empty(&wc.shared))
3863                 fprintf(stderr, "warning line %d\n", __LINE__);
3864
3865         task_stop(ctx.info);
3866
3867         return err;
3868 }
3869
3870 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3871 {
3872         struct list_head *cur = rec->backrefs.next;
3873         struct extent_backref *back;
3874         struct tree_backref *tback;
3875         struct data_backref *dback;
3876         u64 found = 0;
3877         int err = 0;
3878
3879         while(cur != &rec->backrefs) {
3880                 back = to_extent_backref(cur);
3881                 cur = cur->next;
3882                 if (!back->found_extent_tree) {
3883                         err = 1;
3884                         if (!print_errs)
3885                                 goto out;
3886                         if (back->is_data) {
3887                                 dback = to_data_backref(back);
3888                                 fprintf(stderr, "Backref %llu %s %llu"
3889                                         " owner %llu offset %llu num_refs %lu"
3890                                         " not found in extent tree\n",
3891                                         (unsigned long long)rec->start,
3892                                         back->full_backref ?
3893                                         "parent" : "root",
3894                                         back->full_backref ?
3895                                         (unsigned long long)dback->parent:
3896                                         (unsigned long long)dback->root,
3897                                         (unsigned long long)dback->owner,
3898                                         (unsigned long long)dback->offset,
3899                                         (unsigned long)dback->num_refs);
3900                         } else {
3901                                 tback = to_tree_backref(back);
3902                                 fprintf(stderr, "Backref %llu parent %llu"
3903                                         " root %llu not found in extent tree\n",
3904                                         (unsigned long long)rec->start,
3905                                         (unsigned long long)tback->parent,
3906                                         (unsigned long long)tback->root);
3907                         }
3908                 }
3909                 if (!back->is_data && !back->found_ref) {
3910                         err = 1;
3911                         if (!print_errs)
3912                                 goto out;
3913                         tback = to_tree_backref(back);
3914                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3915                                 (unsigned long long)rec->start,
3916                                 back->full_backref ? "parent" : "root",
3917                                 back->full_backref ?
3918                                 (unsigned long long)tback->parent :
3919                                 (unsigned long long)tback->root, back);
3920                 }
3921                 if (back->is_data) {
3922                         dback = to_data_backref(back);
3923                         if (dback->found_ref != dback->num_refs) {
3924                                 err = 1;
3925                                 if (!print_errs)
3926                                         goto out;
3927                                 fprintf(stderr, "Incorrect local backref count"
3928                                         " on %llu %s %llu owner %llu"
3929                                         " offset %llu found %u wanted %u back %p\n",
3930                                         (unsigned long long)rec->start,
3931                                         back->full_backref ?
3932                                         "parent" : "root",
3933                                         back->full_backref ?
3934                                         (unsigned long long)dback->parent:
3935                                         (unsigned long long)dback->root,
3936                                         (unsigned long long)dback->owner,
3937                                         (unsigned long long)dback->offset,
3938                                         dback->found_ref, dback->num_refs, back);
3939                         }
3940                         if (dback->disk_bytenr != rec->start) {
3941                                 err = 1;
3942                                 if (!print_errs)
3943                                         goto out;
3944                                 fprintf(stderr, "Backref disk bytenr does not"
3945                                         " match extent record, bytenr=%llu, "
3946                                         "ref bytenr=%llu\n",
3947                                         (unsigned long long)rec->start,
3948                                         (unsigned long long)dback->disk_bytenr);
3949                         }
3950
3951                         if (dback->bytes != rec->nr) {
3952                                 err = 1;
3953                                 if (!print_errs)
3954                                         goto out;
3955                                 fprintf(stderr, "Backref bytes do not match "
3956                                         "extent backref, bytenr=%llu, ref "
3957                                         "bytes=%llu, backref bytes=%llu\n",
3958                                         (unsigned long long)rec->start,
3959                                         (unsigned long long)rec->nr,
3960                                         (unsigned long long)dback->bytes);
3961                         }
3962                 }
3963                 if (!back->is_data) {
3964                         found += 1;
3965                 } else {
3966                         dback = to_data_backref(back);
3967                         found += dback->found_ref;
3968                 }
3969         }
3970         if (found != rec->refs) {
3971                 err = 1;
3972                 if (!print_errs)
3973                         goto out;
3974                 fprintf(stderr, "Incorrect global backref count "
3975                         "on %llu found %llu wanted %llu\n",
3976                         (unsigned long long)rec->start,
3977                         (unsigned long long)found,
3978                         (unsigned long long)rec->refs);
3979         }
3980 out:
3981         return err;
3982 }
3983
3984 static int free_all_extent_backrefs(struct extent_record *rec)
3985 {
3986         struct extent_backref *back;
3987         struct list_head *cur;
3988         while (!list_empty(&rec->backrefs)) {
3989                 cur = rec->backrefs.next;
3990                 back = to_extent_backref(cur);
3991                 list_del(cur);
3992                 free(back);
3993         }
3994         return 0;
3995 }
3996
3997 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3998                                      struct cache_tree *extent_cache)
3999 {
4000         struct cache_extent *cache;
4001         struct extent_record *rec;
4002
4003         while (1) {
4004                 cache = first_cache_extent(extent_cache);
4005                 if (!cache)
4006                         break;
4007                 rec = container_of(cache, struct extent_record, cache);
4008                 remove_cache_extent(extent_cache, cache);
4009                 free_all_extent_backrefs(rec);
4010                 free(rec);
4011         }
4012 }
4013
4014 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4015                                  struct extent_record *rec)
4016 {
4017         if (rec->content_checked && rec->owner_ref_checked &&
4018             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4019             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4020             !rec->bad_full_backref && !rec->crossing_stripes &&
4021             !rec->wrong_chunk_type) {
4022                 remove_cache_extent(extent_cache, &rec->cache);
4023                 free_all_extent_backrefs(rec);
4024                 list_del_init(&rec->list);
4025                 free(rec);
4026         }
4027         return 0;
4028 }
4029
4030 static int check_owner_ref(struct btrfs_root *root,
4031                             struct extent_record *rec,
4032                             struct extent_buffer *buf)
4033 {
4034         struct extent_backref *node;
4035         struct tree_backref *back;
4036         struct btrfs_root *ref_root;
4037         struct btrfs_key key;
4038         struct btrfs_path path;
4039         struct extent_buffer *parent;
4040         int level;
4041         int found = 0;
4042         int ret;
4043
4044         list_for_each_entry(node, &rec->backrefs, list) {
4045                 if (node->is_data)
4046                         continue;
4047                 if (!node->found_ref)
4048                         continue;
4049                 if (node->full_backref)
4050                         continue;
4051                 back = to_tree_backref(node);
4052                 if (btrfs_header_owner(buf) == back->root)
4053                         return 0;
4054         }
4055         BUG_ON(rec->is_root);
4056
4057         /* try to find the block by search corresponding fs tree */
4058         key.objectid = btrfs_header_owner(buf);
4059         key.type = BTRFS_ROOT_ITEM_KEY;
4060         key.offset = (u64)-1;
4061
4062         ref_root = btrfs_read_fs_root(root->fs_info, &key);
4063         if (IS_ERR(ref_root))
4064                 return 1;
4065
4066         level = btrfs_header_level(buf);
4067         if (level == 0)
4068                 btrfs_item_key_to_cpu(buf, &key, 0);
4069         else
4070                 btrfs_node_key_to_cpu(buf, &key, 0);
4071
4072         btrfs_init_path(&path);
4073         path.lowest_level = level + 1;
4074         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4075         if (ret < 0)
4076                 return 0;
4077
4078         parent = path.nodes[level + 1];
4079         if (parent && buf->start == btrfs_node_blockptr(parent,
4080                                                         path.slots[level + 1]))
4081                 found = 1;
4082
4083         btrfs_release_path(&path);
4084         return found ? 0 : 1;
4085 }
4086
4087 static int is_extent_tree_record(struct extent_record *rec)
4088 {
4089         struct list_head *cur = rec->backrefs.next;
4090         struct extent_backref *node;
4091         struct tree_backref *back;
4092         int is_extent = 0;
4093
4094         while(cur != &rec->backrefs) {
4095                 node = to_extent_backref(cur);
4096                 cur = cur->next;
4097                 if (node->is_data)
4098                         return 0;
4099                 back = to_tree_backref(node);
4100                 if (node->full_backref)
4101                         return 0;
4102                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4103                         is_extent = 1;
4104         }
4105         return is_extent;
4106 }
4107
4108
4109 static int record_bad_block_io(struct btrfs_fs_info *info,
4110                                struct cache_tree *extent_cache,
4111                                u64 start, u64 len)
4112 {
4113         struct extent_record *rec;
4114         struct cache_extent *cache;
4115         struct btrfs_key key;
4116
4117         cache = lookup_cache_extent(extent_cache, start, len);
4118         if (!cache)
4119                 return 0;
4120
4121         rec = container_of(cache, struct extent_record, cache);
4122         if (!is_extent_tree_record(rec))
4123                 return 0;
4124
4125         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4126         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4127 }
4128
4129 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4130                        struct extent_buffer *buf, int slot)
4131 {
4132         if (btrfs_header_level(buf)) {
4133                 struct btrfs_key_ptr ptr1, ptr2;
4134
4135                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4136                                    sizeof(struct btrfs_key_ptr));
4137                 read_extent_buffer(buf, &ptr2,
4138                                    btrfs_node_key_ptr_offset(slot + 1),
4139                                    sizeof(struct btrfs_key_ptr));
4140                 write_extent_buffer(buf, &ptr1,
4141                                     btrfs_node_key_ptr_offset(slot + 1),
4142                                     sizeof(struct btrfs_key_ptr));
4143                 write_extent_buffer(buf, &ptr2,
4144                                     btrfs_node_key_ptr_offset(slot),
4145                                     sizeof(struct btrfs_key_ptr));
4146                 if (slot == 0) {
4147                         struct btrfs_disk_key key;
4148                         btrfs_node_key(buf, &key, 0);
4149                         btrfs_fixup_low_keys(root, path, &key,
4150                                              btrfs_header_level(buf) + 1);
4151                 }
4152         } else {
4153                 struct btrfs_item *item1, *item2;
4154                 struct btrfs_key k1, k2;
4155                 char *item1_data, *item2_data;
4156                 u32 item1_offset, item2_offset, item1_size, item2_size;
4157
4158                 item1 = btrfs_item_nr(slot);
4159                 item2 = btrfs_item_nr(slot + 1);
4160                 btrfs_item_key_to_cpu(buf, &k1, slot);
4161                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4162                 item1_offset = btrfs_item_offset(buf, item1);
4163                 item2_offset = btrfs_item_offset(buf, item2);
4164                 item1_size = btrfs_item_size(buf, item1);
4165                 item2_size = btrfs_item_size(buf, item2);
4166
4167                 item1_data = malloc(item1_size);
4168                 if (!item1_data)
4169                         return -ENOMEM;
4170                 item2_data = malloc(item2_size);
4171                 if (!item2_data) {
4172                         free(item1_data);
4173                         return -ENOMEM;
4174                 }
4175
4176                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4177                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4178
4179                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4180                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4181                 free(item1_data);
4182                 free(item2_data);
4183
4184                 btrfs_set_item_offset(buf, item1, item2_offset);
4185                 btrfs_set_item_offset(buf, item2, item1_offset);
4186                 btrfs_set_item_size(buf, item1, item2_size);
4187                 btrfs_set_item_size(buf, item2, item1_size);
4188
4189                 path->slots[0] = slot;
4190                 btrfs_set_item_key_unsafe(root, path, &k2);
4191                 path->slots[0] = slot + 1;
4192                 btrfs_set_item_key_unsafe(root, path, &k1);
4193         }
4194         return 0;
4195 }
4196
4197 static int fix_key_order(struct btrfs_trans_handle *trans,
4198                          struct btrfs_root *root,
4199                          struct btrfs_path *path)
4200 {
4201         struct extent_buffer *buf;
4202         struct btrfs_key k1, k2;
4203         int i;
4204         int level = path->lowest_level;
4205         int ret = -EIO;
4206
4207         buf = path->nodes[level];
4208         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4209                 if (level) {
4210                         btrfs_node_key_to_cpu(buf, &k1, i);
4211                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4212                 } else {
4213                         btrfs_item_key_to_cpu(buf, &k1, i);
4214                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4215                 }
4216                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4217                         continue;
4218                 ret = swap_values(root, path, buf, i);
4219                 if (ret)
4220                         break;
4221                 btrfs_mark_buffer_dirty(buf);
4222                 i = 0;
4223         }
4224         return ret;
4225 }
4226
4227 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4228                              struct btrfs_root *root,
4229                              struct btrfs_path *path,
4230                              struct extent_buffer *buf, int slot)
4231 {
4232         struct btrfs_key key;
4233         int nritems = btrfs_header_nritems(buf);
4234
4235         btrfs_item_key_to_cpu(buf, &key, slot);
4236
4237         /* These are all the keys we can deal with missing. */
4238         if (key.type != BTRFS_DIR_INDEX_KEY &&
4239             key.type != BTRFS_EXTENT_ITEM_KEY &&
4240             key.type != BTRFS_METADATA_ITEM_KEY &&
4241             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4242             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4243                 return -1;
4244
4245         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4246                (unsigned long long)key.objectid, key.type,
4247                (unsigned long long)key.offset, slot, buf->start);
4248         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4249                               btrfs_item_nr_offset(slot + 1),
4250                               sizeof(struct btrfs_item) *
4251                               (nritems - slot - 1));
4252         btrfs_set_header_nritems(buf, nritems - 1);
4253         if (slot == 0) {
4254                 struct btrfs_disk_key disk_key;
4255
4256                 btrfs_item_key(buf, &disk_key, 0);
4257                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4258         }
4259         btrfs_mark_buffer_dirty(buf);
4260         return 0;
4261 }
4262
4263 static int fix_item_offset(struct btrfs_trans_handle *trans,
4264                            struct btrfs_root *root,
4265                            struct btrfs_path *path)
4266 {
4267         struct extent_buffer *buf;
4268         int i;
4269         int ret = 0;
4270
4271         /* We should only get this for leaves */
4272         BUG_ON(path->lowest_level);
4273         buf = path->nodes[0];
4274 again:
4275         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4276                 unsigned int shift = 0, offset;
4277
4278                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4279                     BTRFS_LEAF_DATA_SIZE(root)) {
4280                         if (btrfs_item_end_nr(buf, i) >
4281                             BTRFS_LEAF_DATA_SIZE(root)) {
4282                                 ret = delete_bogus_item(trans, root, path,
4283                                                         buf, i);
4284                                 if (!ret)
4285                                         goto again;
4286                                 fprintf(stderr, "item is off the end of the "
4287                                         "leaf, can't fix\n");
4288                                 ret = -EIO;
4289                                 break;
4290                         }
4291                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4292                                 btrfs_item_end_nr(buf, i);
4293                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4294                            btrfs_item_offset_nr(buf, i - 1)) {
4295                         if (btrfs_item_end_nr(buf, i) >
4296                             btrfs_item_offset_nr(buf, i - 1)) {
4297                                 ret = delete_bogus_item(trans, root, path,
4298                                                         buf, i);
4299                                 if (!ret)
4300                                         goto again;
4301                                 fprintf(stderr, "items overlap, can't fix\n");
4302                                 ret = -EIO;
4303                                 break;
4304                         }
4305                         shift = btrfs_item_offset_nr(buf, i - 1) -
4306                                 btrfs_item_end_nr(buf, i);
4307                 }
4308                 if (!shift)
4309                         continue;
4310
4311                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4312                        i, shift, (unsigned long long)buf->start);
4313                 offset = btrfs_item_offset_nr(buf, i);
4314                 memmove_extent_buffer(buf,
4315                                       btrfs_leaf_data(buf) + offset + shift,
4316                                       btrfs_leaf_data(buf) + offset,
4317                                       btrfs_item_size_nr(buf, i));
4318                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4319                                       offset + shift);
4320                 btrfs_mark_buffer_dirty(buf);
4321         }
4322
4323         /*
4324          * We may have moved things, in which case we want to exit so we don't
4325          * write those changes out.  Once we have proper abort functionality in
4326          * progs this can be changed to something nicer.
4327          */
4328         BUG_ON(ret);
4329         return ret;
4330 }
4331
4332 /*
4333  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4334  * then just return -EIO.
4335  */
4336 static int try_to_fix_bad_block(struct btrfs_root *root,
4337                                 struct extent_buffer *buf,
4338                                 enum btrfs_tree_block_status status)
4339 {
4340         struct btrfs_trans_handle *trans;
4341         struct ulist *roots;
4342         struct ulist_node *node;
4343         struct btrfs_root *search_root;
4344         struct btrfs_path *path;
4345         struct ulist_iterator iter;
4346         struct btrfs_key root_key, key;
4347         int ret;
4348
4349         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4350             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4351                 return -EIO;
4352
4353         path = btrfs_alloc_path();
4354         if (!path)
4355                 return -EIO;
4356
4357         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4358                                    0, &roots);
4359         if (ret) {
4360                 btrfs_free_path(path);
4361                 return -EIO;
4362         }
4363
4364         ULIST_ITER_INIT(&iter);
4365         while ((node = ulist_next(roots, &iter))) {
4366                 root_key.objectid = node->val;
4367                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4368                 root_key.offset = (u64)-1;
4369
4370                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4371                 if (IS_ERR(root)) {
4372                         ret = -EIO;
4373                         break;
4374                 }
4375
4376
4377                 trans = btrfs_start_transaction(search_root, 0);
4378                 if (IS_ERR(trans)) {
4379                         ret = PTR_ERR(trans);
4380                         break;
4381                 }
4382
4383                 path->lowest_level = btrfs_header_level(buf);
4384                 path->skip_check_block = 1;
4385                 if (path->lowest_level)
4386                         btrfs_node_key_to_cpu(buf, &key, 0);
4387                 else
4388                         btrfs_item_key_to_cpu(buf, &key, 0);
4389                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4390                 if (ret) {
4391                         ret = -EIO;
4392                         btrfs_commit_transaction(trans, search_root);
4393                         break;
4394                 }
4395                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4396                         ret = fix_key_order(trans, search_root, path);
4397                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4398                         ret = fix_item_offset(trans, search_root, path);
4399                 if (ret) {
4400                         btrfs_commit_transaction(trans, search_root);
4401                         break;
4402                 }
4403                 btrfs_release_path(path);
4404                 btrfs_commit_transaction(trans, search_root);
4405         }
4406         ulist_free(roots);
4407         btrfs_free_path(path);
4408         return ret;
4409 }
4410
4411 static int check_block(struct btrfs_root *root,
4412                        struct cache_tree *extent_cache,
4413                        struct extent_buffer *buf, u64 flags)
4414 {
4415         struct extent_record *rec;
4416         struct cache_extent *cache;
4417         struct btrfs_key key;
4418         enum btrfs_tree_block_status status;
4419         int ret = 0;
4420         int level;
4421
4422         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4423         if (!cache)
4424                 return 1;
4425         rec = container_of(cache, struct extent_record, cache);
4426         rec->generation = btrfs_header_generation(buf);
4427
4428         level = btrfs_header_level(buf);
4429         if (btrfs_header_nritems(buf) > 0) {
4430
4431                 if (level == 0)
4432                         btrfs_item_key_to_cpu(buf, &key, 0);
4433                 else
4434                         btrfs_node_key_to_cpu(buf, &key, 0);
4435
4436                 rec->info_objectid = key.objectid;
4437         }
4438         rec->info_level = level;
4439
4440         if (btrfs_is_leaf(buf))
4441                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4442         else
4443                 status = btrfs_check_node(root, &rec->parent_key, buf);
4444
4445         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4446                 if (repair)
4447                         status = try_to_fix_bad_block(root, buf, status);
4448                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4449                         ret = -EIO;
4450                         fprintf(stderr, "bad block %llu\n",
4451                                 (unsigned long long)buf->start);
4452                 } else {
4453                         /*
4454                          * Signal to callers we need to start the scan over
4455                          * again since we'll have cowed blocks.
4456                          */
4457                         ret = -EAGAIN;
4458                 }
4459         } else {
4460                 rec->content_checked = 1;
4461                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4462                         rec->owner_ref_checked = 1;
4463                 else {
4464                         ret = check_owner_ref(root, rec, buf);
4465                         if (!ret)
4466                                 rec->owner_ref_checked = 1;
4467                 }
4468         }
4469         if (!ret)
4470                 maybe_free_extent_rec(extent_cache, rec);
4471         return ret;
4472 }
4473
4474
4475 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4476                                                 u64 parent, u64 root)
4477 {
4478         struct rb_node *node;
4479         struct tree_backref *back = NULL;
4480         struct tree_backref match = {
4481                 .node = {
4482                         .is_data = 0,
4483                 },
4484         };
4485
4486         if (parent) {
4487                 match.parent = parent;
4488                 match.node.full_backref = 1;
4489         } else {
4490                 match.root = root;
4491         }
4492
4493         node = rb_search(&rec->backref_tree, &match.node.node,
4494                          (rb_compare_keys)compare_extent_backref, NULL);
4495         if (node)
4496                 back = to_tree_backref(rb_node_to_extent_backref(node));
4497
4498         return back;
4499 }
4500
4501 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4502                                                 u64 parent, u64 root)
4503 {
4504         struct tree_backref *ref = malloc(sizeof(*ref));
4505
4506         if (!ref)
4507                 return NULL;
4508         memset(&ref->node, 0, sizeof(ref->node));
4509         if (parent > 0) {
4510                 ref->parent = parent;
4511                 ref->node.full_backref = 1;
4512         } else {
4513                 ref->root = root;
4514                 ref->node.full_backref = 0;
4515         }
4516         list_add_tail(&ref->node.list, &rec->backrefs);
4517         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4518
4519         return ref;
4520 }
4521
4522 static struct data_backref *find_data_backref(struct extent_record *rec,
4523                                                 u64 parent, u64 root,
4524                                                 u64 owner, u64 offset,
4525                                                 int found_ref,
4526                                                 u64 disk_bytenr, u64 bytes)
4527 {
4528         struct rb_node *node;
4529         struct data_backref *back = NULL;
4530         struct data_backref match = {
4531                 .node = {
4532                         .is_data = 1,
4533                 },
4534                 .owner = owner,
4535                 .offset = offset,
4536                 .bytes = bytes,
4537                 .found_ref = found_ref,
4538                 .disk_bytenr = disk_bytenr,
4539         };
4540
4541         if (parent) {
4542                 match.parent = parent;
4543                 match.node.full_backref = 1;
4544         } else {
4545                 match.root = root;
4546         }
4547
4548         node = rb_search(&rec->backref_tree, &match.node.node,
4549                          (rb_compare_keys)compare_extent_backref, NULL);
4550         if (node)
4551                 back = to_data_backref(rb_node_to_extent_backref(node));
4552
4553         return back;
4554 }
4555
4556 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4557                                                 u64 parent, u64 root,
4558                                                 u64 owner, u64 offset,
4559                                                 u64 max_size)
4560 {
4561         struct data_backref *ref = malloc(sizeof(*ref));
4562
4563         if (!ref)
4564                 return NULL;
4565         memset(&ref->node, 0, sizeof(ref->node));
4566         ref->node.is_data = 1;
4567
4568         if (parent > 0) {
4569                 ref->parent = parent;
4570                 ref->owner = 0;
4571                 ref->offset = 0;
4572                 ref->node.full_backref = 1;
4573         } else {
4574                 ref->root = root;
4575                 ref->owner = owner;
4576                 ref->offset = offset;
4577                 ref->node.full_backref = 0;
4578         }
4579         ref->bytes = max_size;
4580         ref->found_ref = 0;
4581         ref->num_refs = 0;
4582         list_add_tail(&ref->node.list, &rec->backrefs);
4583         rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4584         if (max_size > rec->max_size)
4585                 rec->max_size = max_size;
4586         return ref;
4587 }
4588
4589 /* Check if the type of extent matches with its chunk */
4590 static void check_extent_type(struct extent_record *rec)
4591 {
4592         struct btrfs_block_group_cache *bg_cache;
4593
4594         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4595         if (!bg_cache)
4596                 return;
4597
4598         /* data extent, check chunk directly*/
4599         if (!rec->metadata) {
4600                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4601                         rec->wrong_chunk_type = 1;
4602                 return;
4603         }
4604
4605         /* metadata extent, check the obvious case first */
4606         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4607                                  BTRFS_BLOCK_GROUP_METADATA))) {
4608                 rec->wrong_chunk_type = 1;
4609                 return;
4610         }
4611
4612         /*
4613          * Check SYSTEM extent, as it's also marked as metadata, we can only
4614          * make sure it's a SYSTEM extent by its backref
4615          */
4616         if (!list_empty(&rec->backrefs)) {
4617                 struct extent_backref *node;
4618                 struct tree_backref *tback;
4619                 u64 bg_type;
4620
4621                 node = to_extent_backref(rec->backrefs.next);
4622                 if (node->is_data) {
4623                         /* tree block shouldn't have data backref */
4624                         rec->wrong_chunk_type = 1;
4625                         return;
4626                 }
4627                 tback = container_of(node, struct tree_backref, node);
4628
4629                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4630                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4631                 else
4632                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4633                 if (!(bg_cache->flags & bg_type))
4634                         rec->wrong_chunk_type = 1;
4635         }
4636 }
4637
4638 /*
4639  * Allocate a new extent record, fill default values from @tmpl and insert int
4640  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4641  * the cache, otherwise it fails.
4642  */
4643 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4644                 struct extent_record *tmpl)
4645 {
4646         struct extent_record *rec;
4647         int ret = 0;
4648
4649         rec = malloc(sizeof(*rec));
4650         if (!rec)
4651                 return -ENOMEM;
4652         rec->start = tmpl->start;
4653         rec->max_size = tmpl->max_size;
4654         rec->nr = max(tmpl->nr, tmpl->max_size);
4655         rec->found_rec = tmpl->found_rec;
4656         rec->content_checked = tmpl->content_checked;
4657         rec->owner_ref_checked = tmpl->owner_ref_checked;
4658         rec->num_duplicates = 0;
4659         rec->metadata = tmpl->metadata;
4660         rec->flag_block_full_backref = FLAG_UNSET;
4661         rec->bad_full_backref = 0;
4662         rec->crossing_stripes = 0;
4663         rec->wrong_chunk_type = 0;
4664         rec->is_root = tmpl->is_root;
4665         rec->refs = tmpl->refs;
4666         rec->extent_item_refs = tmpl->extent_item_refs;
4667         rec->parent_generation = tmpl->parent_generation;
4668         INIT_LIST_HEAD(&rec->backrefs);
4669         INIT_LIST_HEAD(&rec->dups);
4670         INIT_LIST_HEAD(&rec->list);
4671         rec->backref_tree = RB_ROOT;
4672         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4673         rec->cache.start = tmpl->start;
4674         rec->cache.size = tmpl->nr;
4675         ret = insert_cache_extent(extent_cache, &rec->cache);
4676         BUG_ON(ret);
4677         bytes_used += rec->nr;
4678
4679         if (tmpl->metadata)
4680                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4681                                 global_info->tree_root->nodesize);
4682         check_extent_type(rec);
4683         return ret;
4684 }
4685
4686 /*
4687  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4688  * some are hints:
4689  * - refs              - if found, increase refs
4690  * - is_root           - if found, set
4691  * - content_checked   - if found, set
4692  * - owner_ref_checked - if found, set
4693  *
4694  * If not found, create a new one, initialize and insert.
4695  */
4696 static int add_extent_rec(struct cache_tree *extent_cache,
4697                 struct extent_record *tmpl)
4698 {
4699         struct extent_record *rec;
4700         struct cache_extent *cache;
4701         int ret = 0;
4702         int dup = 0;
4703
4704         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4705         if (cache) {
4706                 rec = container_of(cache, struct extent_record, cache);
4707                 if (tmpl->refs)
4708                         rec->refs++;
4709                 if (rec->nr == 1)
4710                         rec->nr = max(tmpl->nr, tmpl->max_size);
4711
4712                 /*
4713                  * We need to make sure to reset nr to whatever the extent
4714                  * record says was the real size, this way we can compare it to
4715                  * the backrefs.
4716                  */
4717                 if (tmpl->found_rec) {
4718                         if (tmpl->start != rec->start || rec->found_rec) {
4719                                 struct extent_record *tmp;
4720
4721                                 dup = 1;
4722                                 if (list_empty(&rec->list))
4723                                         list_add_tail(&rec->list,
4724                                                       &duplicate_extents);
4725
4726                                 /*
4727                                  * We have to do this song and dance in case we
4728                                  * find an extent record that falls inside of
4729                                  * our current extent record but does not have
4730                                  * the same objectid.
4731                                  */
4732                                 tmp = malloc(sizeof(*tmp));
4733                                 if (!tmp)
4734                                         return -ENOMEM;
4735                                 tmp->start = tmpl->start;
4736                                 tmp->max_size = tmpl->max_size;
4737                                 tmp->nr = tmpl->nr;
4738                                 tmp->found_rec = 1;
4739                                 tmp->metadata = tmpl->metadata;
4740                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4741                                 INIT_LIST_HEAD(&tmp->list);
4742                                 list_add_tail(&tmp->list, &rec->dups);
4743                                 rec->num_duplicates++;
4744                         } else {
4745                                 rec->nr = tmpl->nr;
4746                                 rec->found_rec = 1;
4747                         }
4748                 }
4749
4750                 if (tmpl->extent_item_refs && !dup) {
4751                         if (rec->extent_item_refs) {
4752                                 fprintf(stderr, "block %llu rec "
4753                                         "extent_item_refs %llu, passed %llu\n",
4754                                         (unsigned long long)tmpl->start,
4755                                         (unsigned long long)
4756                                                         rec->extent_item_refs,
4757                                         (unsigned long long)tmpl->extent_item_refs);
4758                         }
4759                         rec->extent_item_refs = tmpl->extent_item_refs;
4760                 }
4761                 if (tmpl->is_root)
4762                         rec->is_root = 1;
4763                 if (tmpl->content_checked)
4764                         rec->content_checked = 1;
4765                 if (tmpl->owner_ref_checked)
4766                         rec->owner_ref_checked = 1;
4767                 memcpy(&rec->parent_key, &tmpl->parent_key,
4768                                 sizeof(tmpl->parent_key));
4769                 if (tmpl->parent_generation)
4770                         rec->parent_generation = tmpl->parent_generation;
4771                 if (rec->max_size < tmpl->max_size)
4772                         rec->max_size = tmpl->max_size;
4773
4774                 /*
4775                  * A metadata extent can't cross stripe_len boundary, otherwise
4776                  * kernel scrub won't be able to handle it.
4777                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4778                  * it.
4779                  */
4780                 if (tmpl->metadata)
4781                         rec->crossing_stripes = check_crossing_stripes(
4782                                 rec->start, global_info->tree_root->nodesize);
4783                 check_extent_type(rec);
4784                 maybe_free_extent_rec(extent_cache, rec);
4785                 return ret;
4786         }
4787
4788         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4789
4790         return ret;
4791 }
4792
4793 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4794                             u64 parent, u64 root, int found_ref)
4795 {
4796         struct extent_record *rec;
4797         struct tree_backref *back;
4798         struct cache_extent *cache;
4799
4800         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4801         if (!cache) {
4802                 struct extent_record tmpl;
4803
4804                 memset(&tmpl, 0, sizeof(tmpl));
4805                 tmpl.start = bytenr;
4806                 tmpl.nr = 1;
4807                 tmpl.metadata = 1;
4808
4809                 add_extent_rec_nolookup(extent_cache, &tmpl);
4810
4811                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4812                 if (!cache)
4813                         abort();
4814         }
4815
4816         rec = container_of(cache, struct extent_record, cache);
4817         if (rec->start != bytenr) {
4818                 abort();
4819         }
4820
4821         back = find_tree_backref(rec, parent, root);
4822         if (!back) {
4823                 back = alloc_tree_backref(rec, parent, root);
4824                 BUG_ON(!back);
4825         }
4826
4827         if (found_ref) {
4828                 if (back->node.found_ref) {
4829                         fprintf(stderr, "Extent back ref already exists "
4830                                 "for %llu parent %llu root %llu \n",
4831                                 (unsigned long long)bytenr,
4832                                 (unsigned long long)parent,
4833                                 (unsigned long long)root);
4834                 }
4835                 back->node.found_ref = 1;
4836         } else {
4837                 if (back->node.found_extent_tree) {
4838                         fprintf(stderr, "Extent back ref already exists "
4839                                 "for %llu parent %llu root %llu \n",
4840                                 (unsigned long long)bytenr,
4841                                 (unsigned long long)parent,
4842                                 (unsigned long long)root);
4843                 }
4844                 back->node.found_extent_tree = 1;
4845         }
4846         check_extent_type(rec);
4847         maybe_free_extent_rec(extent_cache, rec);
4848         return 0;
4849 }
4850
4851 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4852                             u64 parent, u64 root, u64 owner, u64 offset,
4853                             u32 num_refs, int found_ref, u64 max_size)
4854 {
4855         struct extent_record *rec;
4856         struct data_backref *back;
4857         struct cache_extent *cache;
4858
4859         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4860         if (!cache) {
4861                 struct extent_record tmpl;
4862
4863                 memset(&tmpl, 0, sizeof(tmpl));
4864                 tmpl.start = bytenr;
4865                 tmpl.nr = 1;
4866                 tmpl.max_size = max_size;
4867
4868                 add_extent_rec_nolookup(extent_cache, &tmpl);
4869
4870                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4871                 if (!cache)
4872                         abort();
4873         }
4874
4875         rec = container_of(cache, struct extent_record, cache);
4876         if (rec->max_size < max_size)
4877                 rec->max_size = max_size;
4878
4879         /*
4880          * If found_ref is set then max_size is the real size and must match the
4881          * existing refs.  So if we have already found a ref then we need to
4882          * make sure that this ref matches the existing one, otherwise we need
4883          * to add a new backref so we can notice that the backrefs don't match
4884          * and we need to figure out who is telling the truth.  This is to
4885          * account for that awful fsync bug I introduced where we'd end up with
4886          * a btrfs_file_extent_item that would have its length include multiple
4887          * prealloc extents or point inside of a prealloc extent.
4888          */
4889         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4890                                  bytenr, max_size);
4891         if (!back) {
4892                 back = alloc_data_backref(rec, parent, root, owner, offset,
4893                                           max_size);
4894                 BUG_ON(!back);
4895         }
4896
4897         if (found_ref) {
4898                 BUG_ON(num_refs != 1);
4899                 if (back->node.found_ref)
4900                         BUG_ON(back->bytes != max_size);
4901                 back->node.found_ref = 1;
4902                 back->found_ref += 1;
4903                 back->bytes = max_size;
4904                 back->disk_bytenr = bytenr;
4905                 rec->refs += 1;
4906                 rec->content_checked = 1;
4907                 rec->owner_ref_checked = 1;
4908         } else {
4909                 if (back->node.found_extent_tree) {
4910                         fprintf(stderr, "Extent back ref already exists "
4911                                 "for %llu parent %llu root %llu "
4912                                 "owner %llu offset %llu num_refs %lu\n",
4913                                 (unsigned long long)bytenr,
4914                                 (unsigned long long)parent,
4915                                 (unsigned long long)root,
4916                                 (unsigned long long)owner,
4917                                 (unsigned long long)offset,
4918                                 (unsigned long)num_refs);
4919                 }
4920                 back->num_refs = num_refs;
4921                 back->node.found_extent_tree = 1;
4922         }
4923         maybe_free_extent_rec(extent_cache, rec);
4924         return 0;
4925 }
4926
4927 static int add_pending(struct cache_tree *pending,
4928                        struct cache_tree *seen, u64 bytenr, u32 size)
4929 {
4930         int ret;
4931         ret = add_cache_extent(seen, bytenr, size);
4932         if (ret)
4933                 return ret;
4934         add_cache_extent(pending, bytenr, size);
4935         return 0;
4936 }
4937
4938 static int pick_next_pending(struct cache_tree *pending,
4939                         struct cache_tree *reada,
4940                         struct cache_tree *nodes,
4941                         u64 last, struct block_info *bits, int bits_nr,
4942                         int *reada_bits)
4943 {
4944         unsigned long node_start = last;
4945         struct cache_extent *cache;
4946         int ret;
4947
4948         cache = search_cache_extent(reada, 0);
4949         if (cache) {
4950                 bits[0].start = cache->start;
4951                 bits[0].size = cache->size;
4952                 *reada_bits = 1;
4953                 return 1;
4954         }
4955         *reada_bits = 0;
4956         if (node_start > 32768)
4957                 node_start -= 32768;
4958
4959         cache = search_cache_extent(nodes, node_start);
4960         if (!cache)
4961                 cache = search_cache_extent(nodes, 0);
4962
4963         if (!cache) {
4964                  cache = search_cache_extent(pending, 0);
4965                  if (!cache)
4966                          return 0;
4967                  ret = 0;
4968                  do {
4969                          bits[ret].start = cache->start;
4970                          bits[ret].size = cache->size;
4971                          cache = next_cache_extent(cache);
4972                          ret++;
4973                  } while (cache && ret < bits_nr);
4974                  return ret;
4975         }
4976
4977         ret = 0;
4978         do {
4979                 bits[ret].start = cache->start;
4980                 bits[ret].size = cache->size;
4981                 cache = next_cache_extent(cache);
4982                 ret++;
4983         } while (cache && ret < bits_nr);
4984
4985         if (bits_nr - ret > 8) {
4986                 u64 lookup = bits[0].start + bits[0].size;
4987                 struct cache_extent *next;
4988                 next = search_cache_extent(pending, lookup);
4989                 while(next) {
4990                         if (next->start - lookup > 32768)
4991                                 break;
4992                         bits[ret].start = next->start;
4993                         bits[ret].size = next->size;
4994                         lookup = next->start + next->size;
4995                         ret++;
4996                         if (ret == bits_nr)
4997                                 break;
4998                         next = next_cache_extent(next);
4999                         if (!next)
5000                                 break;
5001                 }
5002         }
5003         return ret;
5004 }
5005
5006 static void free_chunk_record(struct cache_extent *cache)
5007 {
5008         struct chunk_record *rec;
5009
5010         rec = container_of(cache, struct chunk_record, cache);
5011         list_del_init(&rec->list);
5012         list_del_init(&rec->dextents);
5013         free(rec);
5014 }
5015
5016 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5017 {
5018         cache_tree_free_extents(chunk_cache, free_chunk_record);
5019 }
5020
5021 static void free_device_record(struct rb_node *node)
5022 {
5023         struct device_record *rec;
5024
5025         rec = container_of(node, struct device_record, node);
5026         free(rec);
5027 }
5028
5029 FREE_RB_BASED_TREE(device_cache, free_device_record);
5030
5031 int insert_block_group_record(struct block_group_tree *tree,
5032                               struct block_group_record *bg_rec)
5033 {
5034         int ret;
5035
5036         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5037         if (ret)
5038                 return ret;
5039
5040         list_add_tail(&bg_rec->list, &tree->block_groups);
5041         return 0;
5042 }
5043
5044 static void free_block_group_record(struct cache_extent *cache)
5045 {
5046         struct block_group_record *rec;
5047
5048         rec = container_of(cache, struct block_group_record, cache);
5049         list_del_init(&rec->list);
5050         free(rec);
5051 }
5052
5053 void free_block_group_tree(struct block_group_tree *tree)
5054 {
5055         cache_tree_free_extents(&tree->tree, free_block_group_record);
5056 }
5057
5058 int insert_device_extent_record(struct device_extent_tree *tree,
5059                                 struct device_extent_record *de_rec)
5060 {
5061         int ret;
5062
5063         /*
5064          * Device extent is a bit different from the other extents, because
5065          * the extents which belong to the different devices may have the
5066          * same start and size, so we need use the special extent cache
5067          * search/insert functions.
5068          */
5069         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5070         if (ret)
5071                 return ret;
5072
5073         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5074         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5075         return 0;
5076 }
5077
5078 static void free_device_extent_record(struct cache_extent *cache)
5079 {
5080         struct device_extent_record *rec;
5081
5082         rec = container_of(cache, struct device_extent_record, cache);
5083         if (!list_empty(&rec->chunk_list))
5084                 list_del_init(&rec->chunk_list);
5085         if (!list_empty(&rec->device_list))
5086                 list_del_init(&rec->device_list);
5087         free(rec);
5088 }
5089
5090 void free_device_extent_tree(struct device_extent_tree *tree)
5091 {
5092         cache_tree_free_extents(&tree->tree, free_device_extent_record);
5093 }
5094
5095 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5096 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5097                                  struct extent_buffer *leaf, int slot)
5098 {
5099         struct btrfs_extent_ref_v0 *ref0;
5100         struct btrfs_key key;
5101
5102         btrfs_item_key_to_cpu(leaf, &key, slot);
5103         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5104         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5105                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5106         } else {
5107                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5108                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5109         }
5110         return 0;
5111 }
5112 #endif
5113
5114 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5115                                             struct btrfs_key *key,
5116                                             int slot)
5117 {
5118         struct btrfs_chunk *ptr;
5119         struct chunk_record *rec;
5120         int num_stripes, i;
5121
5122         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5123         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5124
5125         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5126         if (!rec) {
5127                 fprintf(stderr, "memory allocation failed\n");
5128                 exit(-1);
5129         }
5130
5131         INIT_LIST_HEAD(&rec->list);
5132         INIT_LIST_HEAD(&rec->dextents);
5133         rec->bg_rec = NULL;
5134
5135         rec->cache.start = key->offset;
5136         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5137
5138         rec->generation = btrfs_header_generation(leaf);
5139
5140         rec->objectid = key->objectid;
5141         rec->type = key->type;
5142         rec->offset = key->offset;
5143
5144         rec->length = rec->cache.size;
5145         rec->owner = btrfs_chunk_owner(leaf, ptr);
5146         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5147         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5148         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5149         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5150         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5151         rec->num_stripes = num_stripes;
5152         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5153
5154         for (i = 0; i < rec->num_stripes; ++i) {
5155                 rec->stripes[i].devid =
5156                         btrfs_stripe_devid_nr(leaf, ptr, i);
5157                 rec->stripes[i].offset =
5158                         btrfs_stripe_offset_nr(leaf, ptr, i);
5159                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5160                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5161                                 BTRFS_UUID_SIZE);
5162         }
5163
5164         return rec;
5165 }
5166
5167 static int process_chunk_item(struct cache_tree *chunk_cache,
5168                               struct btrfs_key *key, struct extent_buffer *eb,
5169                               int slot)
5170 {
5171         struct chunk_record *rec;
5172         int ret = 0;
5173
5174         rec = btrfs_new_chunk_record(eb, key, slot);
5175         ret = insert_cache_extent(chunk_cache, &rec->cache);
5176         if (ret) {
5177                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5178                         rec->offset, rec->length);
5179                 free(rec);
5180         }
5181
5182         return ret;
5183 }
5184
5185 static int process_device_item(struct rb_root *dev_cache,
5186                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5187 {
5188         struct btrfs_dev_item *ptr;
5189         struct device_record *rec;
5190         int ret = 0;
5191
5192         ptr = btrfs_item_ptr(eb,
5193                 slot, struct btrfs_dev_item);
5194
5195         rec = malloc(sizeof(*rec));
5196         if (!rec) {
5197                 fprintf(stderr, "memory allocation failed\n");
5198                 return -ENOMEM;
5199         }
5200
5201         rec->devid = key->offset;
5202         rec->generation = btrfs_header_generation(eb);
5203
5204         rec->objectid = key->objectid;
5205         rec->type = key->type;
5206         rec->offset = key->offset;
5207
5208         rec->devid = btrfs_device_id(eb, ptr);
5209         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5210         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5211
5212         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5213         if (ret) {
5214                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5215                 free(rec);
5216         }
5217
5218         return ret;
5219 }
5220
5221 struct block_group_record *
5222 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5223                              int slot)
5224 {
5225         struct btrfs_block_group_item *ptr;
5226         struct block_group_record *rec;
5227
5228         rec = calloc(1, sizeof(*rec));
5229         if (!rec) {
5230                 fprintf(stderr, "memory allocation failed\n");
5231                 exit(-1);
5232         }
5233
5234         rec->cache.start = key->objectid;
5235         rec->cache.size = key->offset;
5236
5237         rec->generation = btrfs_header_generation(leaf);
5238
5239         rec->objectid = key->objectid;
5240         rec->type = key->type;
5241         rec->offset = key->offset;
5242
5243         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5244         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5245
5246         INIT_LIST_HEAD(&rec->list);
5247
5248         return rec;
5249 }
5250
5251 static int process_block_group_item(struct block_group_tree *block_group_cache,
5252                                     struct btrfs_key *key,
5253                                     struct extent_buffer *eb, int slot)
5254 {
5255         struct block_group_record *rec;
5256         int ret = 0;
5257
5258         rec = btrfs_new_block_group_record(eb, key, slot);
5259         ret = insert_block_group_record(block_group_cache, rec);
5260         if (ret) {
5261                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5262                         rec->objectid, rec->offset);
5263                 free(rec);
5264         }
5265
5266         return ret;
5267 }
5268
5269 struct device_extent_record *
5270 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5271                                struct btrfs_key *key, int slot)
5272 {
5273         struct device_extent_record *rec;
5274         struct btrfs_dev_extent *ptr;
5275
5276         rec = calloc(1, sizeof(*rec));
5277         if (!rec) {
5278                 fprintf(stderr, "memory allocation failed\n");
5279                 exit(-1);
5280         }
5281
5282         rec->cache.objectid = key->objectid;
5283         rec->cache.start = key->offset;
5284
5285         rec->generation = btrfs_header_generation(leaf);
5286
5287         rec->objectid = key->objectid;
5288         rec->type = key->type;
5289         rec->offset = key->offset;
5290
5291         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5292         rec->chunk_objecteid =
5293                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5294         rec->chunk_offset =
5295                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5296         rec->length = btrfs_dev_extent_length(leaf, ptr);
5297         rec->cache.size = rec->length;
5298
5299         INIT_LIST_HEAD(&rec->chunk_list);
5300         INIT_LIST_HEAD(&rec->device_list);
5301
5302         return rec;
5303 }
5304
5305 static int
5306 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5307                            struct btrfs_key *key, struct extent_buffer *eb,
5308                            int slot)
5309 {
5310         struct device_extent_record *rec;
5311         int ret;
5312
5313         rec = btrfs_new_device_extent_record(eb, key, slot);
5314         ret = insert_device_extent_record(dev_extent_cache, rec);
5315         if (ret) {
5316                 fprintf(stderr,
5317                         "Device extent[%llu, %llu, %llu] existed.\n",
5318                         rec->objectid, rec->offset, rec->length);
5319                 free(rec);
5320         }
5321
5322         return ret;
5323 }
5324
5325 static int process_extent_item(struct btrfs_root *root,
5326                                struct cache_tree *extent_cache,
5327                                struct extent_buffer *eb, int slot)
5328 {
5329         struct btrfs_extent_item *ei;
5330         struct btrfs_extent_inline_ref *iref;
5331         struct btrfs_extent_data_ref *dref;
5332         struct btrfs_shared_data_ref *sref;
5333         struct btrfs_key key;
5334         struct extent_record tmpl;
5335         unsigned long end;
5336         unsigned long ptr;
5337         int type;
5338         u32 item_size = btrfs_item_size_nr(eb, slot);
5339         u64 refs = 0;
5340         u64 offset;
5341         u64 num_bytes;
5342         int metadata = 0;
5343
5344         btrfs_item_key_to_cpu(eb, &key, slot);
5345
5346         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5347                 metadata = 1;
5348                 num_bytes = root->nodesize;
5349         } else {
5350                 num_bytes = key.offset;
5351         }
5352
5353         if (item_size < sizeof(*ei)) {
5354 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5355                 struct btrfs_extent_item_v0 *ei0;
5356                 BUG_ON(item_size != sizeof(*ei0));
5357                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5358                 refs = btrfs_extent_refs_v0(eb, ei0);
5359 #else
5360                 BUG();
5361 #endif
5362                 memset(&tmpl, 0, sizeof(tmpl));
5363                 tmpl.start = key.objectid;
5364                 tmpl.nr = num_bytes;
5365                 tmpl.extent_item_refs = refs;
5366                 tmpl.metadata = metadata;
5367                 tmpl.found_rec = 1;
5368                 tmpl.max_size = num_bytes;
5369
5370                 return add_extent_rec(extent_cache, &tmpl);
5371         }
5372
5373         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5374         refs = btrfs_extent_refs(eb, ei);
5375         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5376                 metadata = 1;
5377         else
5378                 metadata = 0;
5379
5380         memset(&tmpl, 0, sizeof(tmpl));
5381         tmpl.start = key.objectid;
5382         tmpl.nr = num_bytes;
5383         tmpl.extent_item_refs = refs;
5384         tmpl.metadata = metadata;
5385         tmpl.found_rec = 1;
5386         tmpl.max_size = num_bytes;
5387         add_extent_rec(extent_cache, &tmpl);
5388
5389         ptr = (unsigned long)(ei + 1);
5390         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5391             key.type == BTRFS_EXTENT_ITEM_KEY)
5392                 ptr += sizeof(struct btrfs_tree_block_info);
5393
5394         end = (unsigned long)ei + item_size;
5395         while (ptr < end) {
5396                 iref = (struct btrfs_extent_inline_ref *)ptr;
5397                 type = btrfs_extent_inline_ref_type(eb, iref);
5398                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5399                 switch (type) {
5400                 case BTRFS_TREE_BLOCK_REF_KEY:
5401                         add_tree_backref(extent_cache, key.objectid,
5402                                          0, offset, 0);
5403                         break;
5404                 case BTRFS_SHARED_BLOCK_REF_KEY:
5405                         add_tree_backref(extent_cache, key.objectid,
5406                                          offset, 0, 0);
5407                         break;
5408                 case BTRFS_EXTENT_DATA_REF_KEY:
5409                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5410                         add_data_backref(extent_cache, key.objectid, 0,
5411                                         btrfs_extent_data_ref_root(eb, dref),
5412                                         btrfs_extent_data_ref_objectid(eb,
5413                                                                        dref),
5414                                         btrfs_extent_data_ref_offset(eb, dref),
5415                                         btrfs_extent_data_ref_count(eb, dref),
5416                                         0, num_bytes);
5417                         break;
5418                 case BTRFS_SHARED_DATA_REF_KEY:
5419                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5420                         add_data_backref(extent_cache, key.objectid, offset,
5421                                         0, 0, 0,
5422                                         btrfs_shared_data_ref_count(eb, sref),
5423                                         0, num_bytes);
5424                         break;
5425                 default:
5426                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5427                                 key.objectid, key.type, num_bytes);
5428                         goto out;
5429                 }
5430                 ptr += btrfs_extent_inline_ref_size(type);
5431         }
5432         WARN_ON(ptr > end);
5433 out:
5434         return 0;
5435 }
5436
5437 static int check_cache_range(struct btrfs_root *root,
5438                              struct btrfs_block_group_cache *cache,
5439                              u64 offset, u64 bytes)
5440 {
5441         struct btrfs_free_space *entry;
5442         u64 *logical;
5443         u64 bytenr;
5444         int stripe_len;
5445         int i, nr, ret;
5446
5447         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5448                 bytenr = btrfs_sb_offset(i);
5449                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5450                                        cache->key.objectid, bytenr, 0,
5451                                        &logical, &nr, &stripe_len);
5452                 if (ret)
5453                         return ret;
5454
5455                 while (nr--) {
5456                         if (logical[nr] + stripe_len <= offset)
5457                                 continue;
5458                         if (offset + bytes <= logical[nr])
5459                                 continue;
5460                         if (logical[nr] == offset) {
5461                                 if (stripe_len >= bytes) {
5462                                         kfree(logical);
5463                                         return 0;
5464                                 }
5465                                 bytes -= stripe_len;
5466                                 offset += stripe_len;
5467                         } else if (logical[nr] < offset) {
5468                                 if (logical[nr] + stripe_len >=
5469                                     offset + bytes) {
5470                                         kfree(logical);
5471                                         return 0;
5472                                 }
5473                                 bytes = (offset + bytes) -
5474                                         (logical[nr] + stripe_len);
5475                                 offset = logical[nr] + stripe_len;
5476                         } else {
5477                                 /*
5478                                  * Could be tricky, the super may land in the
5479                                  * middle of the area we're checking.  First
5480                                  * check the easiest case, it's at the end.
5481                                  */
5482                                 if (logical[nr] + stripe_len >=
5483                                     bytes + offset) {
5484                                         bytes = logical[nr] - offset;
5485                                         continue;
5486                                 }
5487
5488                                 /* Check the left side */
5489                                 ret = check_cache_range(root, cache,
5490                                                         offset,
5491                                                         logical[nr] - offset);
5492                                 if (ret) {
5493                                         kfree(logical);
5494                                         return ret;
5495                                 }
5496
5497                                 /* Now we continue with the right side */
5498                                 bytes = (offset + bytes) -
5499                                         (logical[nr] + stripe_len);
5500                                 offset = logical[nr] + stripe_len;
5501                         }
5502                 }
5503
5504                 kfree(logical);
5505         }
5506
5507         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5508         if (!entry) {
5509                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5510                         offset, offset+bytes);
5511                 return -EINVAL;
5512         }
5513
5514         if (entry->offset != offset) {
5515                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5516                         entry->offset);
5517                 return -EINVAL;
5518         }
5519
5520         if (entry->bytes != bytes) {
5521                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5522                         bytes, entry->bytes, offset);
5523                 return -EINVAL;
5524         }
5525
5526         unlink_free_space(cache->free_space_ctl, entry);
5527         free(entry);
5528         return 0;
5529 }
5530
5531 static int verify_space_cache(struct btrfs_root *root,
5532                               struct btrfs_block_group_cache *cache)
5533 {
5534         struct btrfs_path *path;
5535         struct extent_buffer *leaf;
5536         struct btrfs_key key;
5537         u64 last;
5538         int ret = 0;
5539
5540         path = btrfs_alloc_path();
5541         if (!path)
5542                 return -ENOMEM;
5543
5544         root = root->fs_info->extent_root;
5545
5546         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5547
5548         key.objectid = last;
5549         key.offset = 0;
5550         key.type = BTRFS_EXTENT_ITEM_KEY;
5551
5552         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5553         if (ret < 0)
5554                 goto out;
5555         ret = 0;
5556         while (1) {
5557                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5558                         ret = btrfs_next_leaf(root, path);
5559                         if (ret < 0)
5560                                 goto out;
5561                         if (ret > 0) {
5562                                 ret = 0;
5563                                 break;
5564                         }
5565                 }
5566                 leaf = path->nodes[0];
5567                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5568                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5569                         break;
5570                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5571                     key.type != BTRFS_METADATA_ITEM_KEY) {
5572                         path->slots[0]++;
5573                         continue;
5574                 }
5575
5576                 if (last == key.objectid) {
5577                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5578                                 last = key.objectid + key.offset;
5579                         else
5580                                 last = key.objectid + root->nodesize;
5581                         path->slots[0]++;
5582                         continue;
5583                 }
5584
5585                 ret = check_cache_range(root, cache, last,
5586                                         key.objectid - last);
5587                 if (ret)
5588                         break;
5589                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5590                         last = key.objectid + key.offset;
5591                 else
5592                         last = key.objectid + root->nodesize;
5593                 path->slots[0]++;
5594         }
5595
5596         if (last < cache->key.objectid + cache->key.offset)
5597                 ret = check_cache_range(root, cache, last,
5598                                         cache->key.objectid +
5599                                         cache->key.offset - last);
5600
5601 out:
5602         btrfs_free_path(path);
5603
5604         if (!ret &&
5605             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5606                 fprintf(stderr, "There are still entries left in the space "
5607                         "cache\n");
5608                 ret = -EINVAL;
5609         }
5610
5611         return ret;
5612 }
5613
5614 static int check_space_cache(struct btrfs_root *root)
5615 {
5616         struct btrfs_block_group_cache *cache;
5617         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5618         int ret;
5619         int error = 0;
5620
5621         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5622             btrfs_super_generation(root->fs_info->super_copy) !=
5623             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5624                 printf("cache and super generation don't match, space cache "
5625                        "will be invalidated\n");
5626                 return 0;
5627         }
5628
5629         if (ctx.progress_enabled) {
5630                 ctx.tp = TASK_FREE_SPACE;
5631                 task_start(ctx.info);
5632         }
5633
5634         while (1) {
5635                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5636                 if (!cache)
5637                         break;
5638
5639                 start = cache->key.objectid + cache->key.offset;
5640                 if (!cache->free_space_ctl) {
5641                         if (btrfs_init_free_space_ctl(cache,
5642                                                       root->sectorsize)) {
5643                                 ret = -ENOMEM;
5644                                 break;
5645                         }
5646                 } else {
5647                         btrfs_remove_free_space_cache(cache);
5648                 }
5649
5650                 if (btrfs_fs_compat_ro(root->fs_info,
5651                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5652                         ret = exclude_super_stripes(root, cache);
5653                         if (ret) {
5654                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5655                                         strerror(-ret));
5656                                 error++;
5657                                 continue;
5658                         }
5659                         ret = load_free_space_tree(root->fs_info, cache);
5660                         free_excluded_extents(root, cache);
5661                         if (ret < 0) {
5662                                 fprintf(stderr, "could not load free space tree: %s\n",
5663                                         strerror(-ret));
5664                                 error++;
5665                                 continue;
5666                         }
5667                         error += ret;
5668                 } else {
5669                         ret = load_free_space_cache(root->fs_info, cache);
5670                         if (!ret)
5671                                 continue;
5672                 }
5673
5674                 ret = verify_space_cache(root, cache);
5675                 if (ret) {
5676                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5677                                 cache->key.objectid);
5678                         error++;
5679                 }
5680         }
5681
5682         task_stop(ctx.info);
5683
5684         return error ? -EINVAL : 0;
5685 }
5686
5687 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5688                         u64 num_bytes, unsigned long leaf_offset,
5689                         struct extent_buffer *eb) {
5690
5691         u64 offset = 0;
5692         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5693         char *data;
5694         unsigned long csum_offset;
5695         u32 csum;
5696         u32 csum_expected;
5697         u64 read_len;
5698         u64 data_checked = 0;
5699         u64 tmp;
5700         int ret = 0;
5701         int mirror;
5702         int num_copies;
5703
5704         if (num_bytes % root->sectorsize)
5705                 return -EINVAL;
5706
5707         data = malloc(num_bytes);
5708         if (!data)
5709                 return -ENOMEM;
5710
5711         while (offset < num_bytes) {
5712                 mirror = 0;
5713 again:
5714                 read_len = num_bytes - offset;
5715                 /* read as much space once a time */
5716                 ret = read_extent_data(root, data + offset,
5717                                 bytenr + offset, &read_len, mirror);
5718                 if (ret)
5719                         goto out;
5720                 data_checked = 0;
5721                 /* verify every 4k data's checksum */
5722                 while (data_checked < read_len) {
5723                         csum = ~(u32)0;
5724                         tmp = offset + data_checked;
5725
5726                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5727                                                csum, root->sectorsize);
5728                         btrfs_csum_final(csum, (char *)&csum);
5729
5730                         csum_offset = leaf_offset +
5731                                  tmp / root->sectorsize * csum_size;
5732                         read_extent_buffer(eb, (char *)&csum_expected,
5733                                            csum_offset, csum_size);
5734                         /* try another mirror */
5735                         if (csum != csum_expected) {
5736                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5737                                                 mirror, bytenr + tmp,
5738                                                 csum, csum_expected);
5739                                 num_copies = btrfs_num_copies(
5740                                                 &root->fs_info->mapping_tree,
5741                                                 bytenr, num_bytes);
5742                                 if (mirror < num_copies - 1) {
5743                                         mirror += 1;
5744                                         goto again;
5745                                 }
5746                         }
5747                         data_checked += root->sectorsize;
5748                 }
5749                 offset += read_len;
5750         }
5751 out:
5752         free(data);
5753         return ret;
5754 }
5755
5756 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5757                                u64 num_bytes)
5758 {
5759         struct btrfs_path *path;
5760         struct extent_buffer *leaf;
5761         struct btrfs_key key;
5762         int ret;
5763
5764         path = btrfs_alloc_path();
5765         if (!path) {
5766                 fprintf(stderr, "Error allocating path\n");
5767                 return -ENOMEM;
5768         }
5769
5770         key.objectid = bytenr;
5771         key.type = BTRFS_EXTENT_ITEM_KEY;
5772         key.offset = (u64)-1;
5773
5774 again:
5775         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5776                                 0, 0);
5777         if (ret < 0) {
5778                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5779                 btrfs_free_path(path);
5780                 return ret;
5781         } else if (ret) {
5782                 if (path->slots[0] > 0) {
5783                         path->slots[0]--;
5784                 } else {
5785                         ret = btrfs_prev_leaf(root, path);
5786                         if (ret < 0) {
5787                                 goto out;
5788                         } else if (ret > 0) {
5789                                 ret = 0;
5790                                 goto out;
5791                         }
5792                 }
5793         }
5794
5795         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5796
5797         /*
5798          * Block group items come before extent items if they have the same
5799          * bytenr, so walk back one more just in case.  Dear future traveller,
5800          * first congrats on mastering time travel.  Now if it's not too much
5801          * trouble could you go back to 2006 and tell Chris to make the
5802          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5803          * EXTENT_ITEM_KEY please?
5804          */
5805         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5806                 if (path->slots[0] > 0) {
5807                         path->slots[0]--;
5808                 } else {
5809                         ret = btrfs_prev_leaf(root, path);
5810                         if (ret < 0) {
5811                                 goto out;
5812                         } else if (ret > 0) {
5813                                 ret = 0;
5814                                 goto out;
5815                         }
5816                 }
5817                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5818         }
5819
5820         while (num_bytes) {
5821                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5822                         ret = btrfs_next_leaf(root, path);
5823                         if (ret < 0) {
5824                                 fprintf(stderr, "Error going to next leaf "
5825                                         "%d\n", ret);
5826                                 btrfs_free_path(path);
5827                                 return ret;
5828                         } else if (ret) {
5829                                 break;
5830                         }
5831                 }
5832                 leaf = path->nodes[0];
5833                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5834                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5835                         path->slots[0]++;
5836                         continue;
5837                 }
5838                 if (key.objectid + key.offset < bytenr) {
5839                         path->slots[0]++;
5840                         continue;
5841                 }
5842                 if (key.objectid > bytenr + num_bytes)
5843                         break;
5844
5845                 if (key.objectid == bytenr) {
5846                         if (key.offset >= num_bytes) {
5847                                 num_bytes = 0;
5848                                 break;
5849                         }
5850                         num_bytes -= key.offset;
5851                         bytenr += key.offset;
5852                 } else if (key.objectid < bytenr) {
5853                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5854                                 num_bytes = 0;
5855                                 break;
5856                         }
5857                         num_bytes = (bytenr + num_bytes) -
5858                                 (key.objectid + key.offset);
5859                         bytenr = key.objectid + key.offset;
5860                 } else {
5861                         if (key.objectid + key.offset < bytenr + num_bytes) {
5862                                 u64 new_start = key.objectid + key.offset;
5863                                 u64 new_bytes = bytenr + num_bytes - new_start;
5864
5865                                 /*
5866                                  * Weird case, the extent is in the middle of
5867                                  * our range, we'll have to search one side
5868                                  * and then the other.  Not sure if this happens
5869                                  * in real life, but no harm in coding it up
5870                                  * anyway just in case.
5871                                  */
5872                                 btrfs_release_path(path);
5873                                 ret = check_extent_exists(root, new_start,
5874                                                           new_bytes);
5875                                 if (ret) {
5876                                         fprintf(stderr, "Right section didn't "
5877                                                 "have a record\n");
5878                                         break;
5879                                 }
5880                                 num_bytes = key.objectid - bytenr;
5881                                 goto again;
5882                         }
5883                         num_bytes = key.objectid - bytenr;
5884                 }
5885                 path->slots[0]++;
5886         }
5887         ret = 0;
5888
5889 out:
5890         if (num_bytes && !ret) {
5891                 fprintf(stderr, "There are no extents for csum range "
5892                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5893                 ret = 1;
5894         }
5895
5896         btrfs_free_path(path);
5897         return ret;
5898 }
5899
5900 static int check_csums(struct btrfs_root *root)
5901 {
5902         struct btrfs_path *path;
5903         struct extent_buffer *leaf;
5904         struct btrfs_key key;
5905         u64 offset = 0, num_bytes = 0;
5906         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5907         int errors = 0;
5908         int ret;
5909         u64 data_len;
5910         unsigned long leaf_offset;
5911
5912         root = root->fs_info->csum_root;
5913         if (!extent_buffer_uptodate(root->node)) {
5914                 fprintf(stderr, "No valid csum tree found\n");
5915                 return -ENOENT;
5916         }
5917
5918         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5919         key.type = BTRFS_EXTENT_CSUM_KEY;
5920         key.offset = 0;
5921
5922         path = btrfs_alloc_path();
5923         if (!path)
5924                 return -ENOMEM;
5925
5926         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5927         if (ret < 0) {
5928                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5929                 btrfs_free_path(path);
5930                 return ret;
5931         }
5932
5933         if (ret > 0 && path->slots[0])
5934                 path->slots[0]--;
5935         ret = 0;
5936
5937         while (1) {
5938                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5939                         ret = btrfs_next_leaf(root, path);
5940                         if (ret < 0) {
5941                                 fprintf(stderr, "Error going to next leaf "
5942                                         "%d\n", ret);
5943                                 break;
5944                         }
5945                         if (ret)
5946                                 break;
5947                 }
5948                 leaf = path->nodes[0];
5949
5950                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5951                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5952                         path->slots[0]++;
5953                         continue;
5954                 }
5955
5956                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5957                               csum_size) * root->sectorsize;
5958                 if (!check_data_csum)
5959                         goto skip_csum_check;
5960                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5961                 ret = check_extent_csums(root, key.offset, data_len,
5962                                          leaf_offset, leaf);
5963                 if (ret)
5964                         break;
5965 skip_csum_check:
5966                 if (!num_bytes) {
5967                         offset = key.offset;
5968                 } else if (key.offset != offset + num_bytes) {
5969                         ret = check_extent_exists(root, offset, num_bytes);
5970                         if (ret) {
5971                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5972                                         "there is no extent record\n",
5973                                         offset, offset+num_bytes);
5974                                 errors++;
5975                         }
5976                         offset = key.offset;
5977                         num_bytes = 0;
5978                 }
5979                 num_bytes += data_len;
5980                 path->slots[0]++;
5981         }
5982
5983         btrfs_free_path(path);
5984         return errors;
5985 }
5986
5987 static int is_dropped_key(struct btrfs_key *key,
5988                           struct btrfs_key *drop_key) {
5989         if (key->objectid < drop_key->objectid)
5990                 return 1;
5991         else if (key->objectid == drop_key->objectid) {
5992                 if (key->type < drop_key->type)
5993                         return 1;
5994                 else if (key->type == drop_key->type) {
5995                         if (key->offset < drop_key->offset)
5996                                 return 1;
5997                 }
5998         }
5999         return 0;
6000 }
6001
6002 /*
6003  * Here are the rules for FULL_BACKREF.
6004  *
6005  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6006  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6007  *      FULL_BACKREF set.
6008  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
6009  *    if it happened after the relocation occurred since we'll have dropped the
6010  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6011  *    have no real way to know for sure.
6012  *
6013  * We process the blocks one root at a time, and we start from the lowest root
6014  * objectid and go to the highest.  So we can just lookup the owner backref for
6015  * the record and if we don't find it then we know it doesn't exist and we have
6016  * a FULL BACKREF.
6017  *
6018  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6019  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6020  * be set or not and then we can check later once we've gathered all the refs.
6021  */
6022 static int calc_extent_flag(struct btrfs_root *root,
6023                            struct cache_tree *extent_cache,
6024                            struct extent_buffer *buf,
6025                            struct root_item_record *ri,
6026                            u64 *flags)
6027 {
6028         struct extent_record *rec;
6029         struct cache_extent *cache;
6030         struct tree_backref *tback;
6031         u64 owner = 0;
6032
6033         cache = lookup_cache_extent(extent_cache, buf->start, 1);
6034         /* we have added this extent before */
6035         BUG_ON(!cache);
6036         rec = container_of(cache, struct extent_record, cache);
6037
6038         /*
6039          * Except file/reloc tree, we can not have
6040          * FULL BACKREF MODE
6041          */
6042         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6043                 goto normal;
6044         /*
6045          * root node
6046          */
6047         if (buf->start == ri->bytenr)
6048                 goto normal;
6049
6050         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6051                 goto full_backref;
6052
6053         owner = btrfs_header_owner(buf);
6054         if (owner == ri->objectid)
6055                 goto normal;
6056
6057         tback = find_tree_backref(rec, 0, owner);
6058         if (!tback)
6059                 goto full_backref;
6060 normal:
6061         *flags = 0;
6062         if (rec->flag_block_full_backref != FLAG_UNSET &&
6063             rec->flag_block_full_backref != 0)
6064                 rec->bad_full_backref = 1;
6065         return 0;
6066 full_backref:
6067         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6068         if (rec->flag_block_full_backref != FLAG_UNSET &&
6069             rec->flag_block_full_backref != 1)
6070                 rec->bad_full_backref = 1;
6071         return 0;
6072 }
6073
6074 static int run_next_block(struct btrfs_root *root,
6075                           struct block_info *bits,
6076                           int bits_nr,
6077                           u64 *last,
6078                           struct cache_tree *pending,
6079                           struct cache_tree *seen,
6080                           struct cache_tree *reada,
6081                           struct cache_tree *nodes,
6082                           struct cache_tree *extent_cache,
6083                           struct cache_tree *chunk_cache,
6084                           struct rb_root *dev_cache,
6085                           struct block_group_tree *block_group_cache,
6086                           struct device_extent_tree *dev_extent_cache,
6087                           struct root_item_record *ri)
6088 {
6089         struct extent_buffer *buf;
6090         struct extent_record *rec = NULL;
6091         u64 bytenr;
6092         u32 size;
6093         u64 parent;
6094         u64 owner;
6095         u64 flags;
6096         u64 ptr;
6097         u64 gen = 0;
6098         int ret = 0;
6099         int i;
6100         int nritems;
6101         struct btrfs_key key;
6102         struct cache_extent *cache;
6103         int reada_bits;
6104
6105         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6106                                     bits_nr, &reada_bits);
6107         if (nritems == 0)
6108                 return 1;
6109
6110         if (!reada_bits) {
6111                 for(i = 0; i < nritems; i++) {
6112                         ret = add_cache_extent(reada, bits[i].start,
6113                                                bits[i].size);
6114                         if (ret == -EEXIST)
6115                                 continue;
6116
6117                         /* fixme, get the parent transid */
6118                         readahead_tree_block(root, bits[i].start,
6119                                              bits[i].size, 0);
6120                 }
6121         }
6122         *last = bits[0].start;
6123         bytenr = bits[0].start;
6124         size = bits[0].size;
6125
6126         cache = lookup_cache_extent(pending, bytenr, size);
6127         if (cache) {
6128                 remove_cache_extent(pending, cache);
6129                 free(cache);
6130         }
6131         cache = lookup_cache_extent(reada, bytenr, size);
6132         if (cache) {
6133                 remove_cache_extent(reada, cache);
6134                 free(cache);
6135         }
6136         cache = lookup_cache_extent(nodes, bytenr, size);
6137         if (cache) {
6138                 remove_cache_extent(nodes, cache);
6139                 free(cache);
6140         }
6141         cache = lookup_cache_extent(extent_cache, bytenr, size);
6142         if (cache) {
6143                 rec = container_of(cache, struct extent_record, cache);
6144                 gen = rec->parent_generation;
6145         }
6146
6147         /* fixme, get the real parent transid */
6148         buf = read_tree_block(root, bytenr, size, gen);
6149         if (!extent_buffer_uptodate(buf)) {
6150                 record_bad_block_io(root->fs_info,
6151                                     extent_cache, bytenr, size);
6152                 goto out;
6153         }
6154
6155         nritems = btrfs_header_nritems(buf);
6156
6157         flags = 0;
6158         if (!init_extent_tree) {
6159                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6160                                        btrfs_header_level(buf), 1, NULL,
6161                                        &flags);
6162                 if (ret < 0) {
6163                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6164                         if (ret < 0) {
6165                                 fprintf(stderr, "Couldn't calc extent flags\n");
6166                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6167                         }
6168                 }
6169         } else {
6170                 flags = 0;
6171                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6172                 if (ret < 0) {
6173                         fprintf(stderr, "Couldn't calc extent flags\n");
6174                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6175                 }
6176         }
6177
6178         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6179                 if (ri != NULL &&
6180                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6181                     ri->objectid == btrfs_header_owner(buf)) {
6182                         /*
6183                          * Ok we got to this block from it's original owner and
6184                          * we have FULL_BACKREF set.  Relocation can leave
6185                          * converted blocks over so this is altogether possible,
6186                          * however it's not possible if the generation > the
6187                          * last snapshot, so check for this case.
6188                          */
6189                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6190                             btrfs_header_generation(buf) > ri->last_snapshot) {
6191                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6192                                 rec->bad_full_backref = 1;
6193                         }
6194                 }
6195         } else {
6196                 if (ri != NULL &&
6197                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6198                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6199                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6200                         rec->bad_full_backref = 1;
6201                 }
6202         }
6203
6204         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6205                 rec->flag_block_full_backref = 1;
6206                 parent = bytenr;
6207                 owner = 0;
6208         } else {
6209                 rec->flag_block_full_backref = 0;
6210                 parent = 0;
6211                 owner = btrfs_header_owner(buf);
6212         }
6213
6214         ret = check_block(root, extent_cache, buf, flags);
6215         if (ret)
6216                 goto out;
6217
6218         if (btrfs_is_leaf(buf)) {
6219                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6220                 for (i = 0; i < nritems; i++) {
6221                         struct btrfs_file_extent_item *fi;
6222                         btrfs_item_key_to_cpu(buf, &key, i);
6223                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6224                                 process_extent_item(root, extent_cache, buf,
6225                                                     i);
6226                                 continue;
6227                         }
6228                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6229                                 process_extent_item(root, extent_cache, buf,
6230                                                     i);
6231                                 continue;
6232                         }
6233                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6234                                 total_csum_bytes +=
6235                                         btrfs_item_size_nr(buf, i);
6236                                 continue;
6237                         }
6238                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6239                                 process_chunk_item(chunk_cache, &key, buf, i);
6240                                 continue;
6241                         }
6242                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6243                                 process_device_item(dev_cache, &key, buf, i);
6244                                 continue;
6245                         }
6246                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6247                                 process_block_group_item(block_group_cache,
6248                                         &key, buf, i);
6249                                 continue;
6250                         }
6251                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6252                                 process_device_extent_item(dev_extent_cache,
6253                                         &key, buf, i);
6254                                 continue;
6255
6256                         }
6257                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6258 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6259                                 process_extent_ref_v0(extent_cache, buf, i);
6260 #else
6261                                 BUG();
6262 #endif
6263                                 continue;
6264                         }
6265
6266                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6267                                 add_tree_backref(extent_cache, key.objectid, 0,
6268                                                  key.offset, 0);
6269                                 continue;
6270                         }
6271                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6272                                 add_tree_backref(extent_cache, key.objectid,
6273                                                  key.offset, 0, 0);
6274                                 continue;
6275                         }
6276                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6277                                 struct btrfs_extent_data_ref *ref;
6278                                 ref = btrfs_item_ptr(buf, i,
6279                                                 struct btrfs_extent_data_ref);
6280                                 add_data_backref(extent_cache,
6281                                         key.objectid, 0,
6282                                         btrfs_extent_data_ref_root(buf, ref),
6283                                         btrfs_extent_data_ref_objectid(buf,
6284                                                                        ref),
6285                                         btrfs_extent_data_ref_offset(buf, ref),
6286                                         btrfs_extent_data_ref_count(buf, ref),
6287                                         0, root->sectorsize);
6288                                 continue;
6289                         }
6290                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6291                                 struct btrfs_shared_data_ref *ref;
6292                                 ref = btrfs_item_ptr(buf, i,
6293                                                 struct btrfs_shared_data_ref);
6294                                 add_data_backref(extent_cache,
6295                                         key.objectid, key.offset, 0, 0, 0,
6296                                         btrfs_shared_data_ref_count(buf, ref),
6297                                         0, root->sectorsize);
6298                                 continue;
6299                         }
6300                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6301                                 struct bad_item *bad;
6302
6303                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6304                                         continue;
6305                                 if (!owner)
6306                                         continue;
6307                                 bad = malloc(sizeof(struct bad_item));
6308                                 if (!bad)
6309                                         continue;
6310                                 INIT_LIST_HEAD(&bad->list);
6311                                 memcpy(&bad->key, &key,
6312                                        sizeof(struct btrfs_key));
6313                                 bad->root_id = owner;
6314                                 list_add_tail(&bad->list, &delete_items);
6315                                 continue;
6316                         }
6317                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6318                                 continue;
6319                         fi = btrfs_item_ptr(buf, i,
6320                                             struct btrfs_file_extent_item);
6321                         if (btrfs_file_extent_type(buf, fi) ==
6322                             BTRFS_FILE_EXTENT_INLINE)
6323                                 continue;
6324                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6325                                 continue;
6326
6327                         data_bytes_allocated +=
6328                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6329                         if (data_bytes_allocated < root->sectorsize) {
6330                                 abort();
6331                         }
6332                         data_bytes_referenced +=
6333                                 btrfs_file_extent_num_bytes(buf, fi);
6334                         add_data_backref(extent_cache,
6335                                 btrfs_file_extent_disk_bytenr(buf, fi),
6336                                 parent, owner, key.objectid, key.offset -
6337                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6338                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6339                 }
6340         } else {
6341                 int level;
6342                 struct btrfs_key first_key;
6343
6344                 first_key.objectid = 0;
6345
6346                 if (nritems > 0)
6347                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6348                 level = btrfs_header_level(buf);
6349                 for (i = 0; i < nritems; i++) {
6350                         struct extent_record tmpl;
6351
6352                         ptr = btrfs_node_blockptr(buf, i);
6353                         size = root->nodesize;
6354                         btrfs_node_key_to_cpu(buf, &key, i);
6355                         if (ri != NULL) {
6356                                 if ((level == ri->drop_level)
6357                                     && is_dropped_key(&key, &ri->drop_key)) {
6358                                         continue;
6359                                 }
6360                         }
6361
6362                         memset(&tmpl, 0, sizeof(tmpl));
6363                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6364                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6365                         tmpl.start = ptr;
6366                         tmpl.nr = size;
6367                         tmpl.refs = 1;
6368                         tmpl.metadata = 1;
6369                         tmpl.max_size = size;
6370                         ret = add_extent_rec(extent_cache, &tmpl);
6371                         BUG_ON(ret);
6372
6373                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6374
6375                         if (level > 1) {
6376                                 add_pending(nodes, seen, ptr, size);
6377                         } else {
6378                                 add_pending(pending, seen, ptr, size);
6379                         }
6380                 }
6381                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6382                                       nritems) * sizeof(struct btrfs_key_ptr);
6383         }
6384         total_btree_bytes += buf->len;
6385         if (fs_root_objectid(btrfs_header_owner(buf)))
6386                 total_fs_tree_bytes += buf->len;
6387         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6388                 total_extent_tree_bytes += buf->len;
6389         if (!found_old_backref &&
6390             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6391             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6392             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6393                 found_old_backref = 1;
6394 out:
6395         free_extent_buffer(buf);
6396         return ret;
6397 }
6398
6399 static int add_root_to_pending(struct extent_buffer *buf,
6400                                struct cache_tree *extent_cache,
6401                                struct cache_tree *pending,
6402                                struct cache_tree *seen,
6403                                struct cache_tree *nodes,
6404                                u64 objectid)
6405 {
6406         struct extent_record tmpl;
6407
6408         if (btrfs_header_level(buf) > 0)
6409                 add_pending(nodes, seen, buf->start, buf->len);
6410         else
6411                 add_pending(pending, seen, buf->start, buf->len);
6412
6413         memset(&tmpl, 0, sizeof(tmpl));
6414         tmpl.start = buf->start;
6415         tmpl.nr = buf->len;
6416         tmpl.is_root = 1;
6417         tmpl.refs = 1;
6418         tmpl.metadata = 1;
6419         tmpl.max_size = buf->len;
6420         add_extent_rec(extent_cache, &tmpl);
6421
6422         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6423             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6424                 add_tree_backref(extent_cache, buf->start, buf->start,
6425                                  0, 1);
6426         else
6427                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6428         return 0;
6429 }
6430
6431 /* as we fix the tree, we might be deleting blocks that
6432  * we're tracking for repair.  This hook makes sure we
6433  * remove any backrefs for blocks as we are fixing them.
6434  */
6435 static int free_extent_hook(struct btrfs_trans_handle *trans,
6436                             struct btrfs_root *root,
6437                             u64 bytenr, u64 num_bytes, u64 parent,
6438                             u64 root_objectid, u64 owner, u64 offset,
6439                             int refs_to_drop)
6440 {
6441         struct extent_record *rec;
6442         struct cache_extent *cache;
6443         int is_data;
6444         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6445
6446         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6447         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6448         if (!cache)
6449                 return 0;
6450
6451         rec = container_of(cache, struct extent_record, cache);
6452         if (is_data) {
6453                 struct data_backref *back;
6454                 back = find_data_backref(rec, parent, root_objectid, owner,
6455                                          offset, 1, bytenr, num_bytes);
6456                 if (!back)
6457                         goto out;
6458                 if (back->node.found_ref) {
6459                         back->found_ref -= refs_to_drop;
6460                         if (rec->refs)
6461                                 rec->refs -= refs_to_drop;
6462                 }
6463                 if (back->node.found_extent_tree) {
6464                         back->num_refs -= refs_to_drop;
6465                         if (rec->extent_item_refs)
6466                                 rec->extent_item_refs -= refs_to_drop;
6467                 }
6468                 if (back->found_ref == 0)
6469                         back->node.found_ref = 0;
6470                 if (back->num_refs == 0)
6471                         back->node.found_extent_tree = 0;
6472
6473                 if (!back->node.found_extent_tree && back->node.found_ref) {
6474                         list_del(&back->node.list);
6475                         free(back);
6476                 }
6477         } else {
6478                 struct tree_backref *back;
6479                 back = find_tree_backref(rec, parent, root_objectid);
6480                 if (!back)
6481                         goto out;
6482                 if (back->node.found_ref) {
6483                         if (rec->refs)
6484                                 rec->refs--;
6485                         back->node.found_ref = 0;
6486                 }
6487                 if (back->node.found_extent_tree) {
6488                         if (rec->extent_item_refs)
6489                                 rec->extent_item_refs--;
6490                         back->node.found_extent_tree = 0;
6491                 }
6492                 if (!back->node.found_extent_tree && back->node.found_ref) {
6493                         list_del(&back->node.list);
6494                         free(back);
6495                 }
6496         }
6497         maybe_free_extent_rec(extent_cache, rec);
6498 out:
6499         return 0;
6500 }
6501
6502 static int delete_extent_records(struct btrfs_trans_handle *trans,
6503                                  struct btrfs_root *root,
6504                                  struct btrfs_path *path,
6505                                  u64 bytenr, u64 new_len)
6506 {
6507         struct btrfs_key key;
6508         struct btrfs_key found_key;
6509         struct extent_buffer *leaf;
6510         int ret;
6511         int slot;
6512
6513
6514         key.objectid = bytenr;
6515         key.type = (u8)-1;
6516         key.offset = (u64)-1;
6517
6518         while(1) {
6519                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6520                                         &key, path, 0, 1);
6521                 if (ret < 0)
6522                         break;
6523
6524                 if (ret > 0) {
6525                         ret = 0;
6526                         if (path->slots[0] == 0)
6527                                 break;
6528                         path->slots[0]--;
6529                 }
6530                 ret = 0;
6531
6532                 leaf = path->nodes[0];
6533                 slot = path->slots[0];
6534
6535                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6536                 if (found_key.objectid != bytenr)
6537                         break;
6538
6539                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6540                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6541                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6542                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6543                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6544                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6545                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6546                         btrfs_release_path(path);
6547                         if (found_key.type == 0) {
6548                                 if (found_key.offset == 0)
6549                                         break;
6550                                 key.offset = found_key.offset - 1;
6551                                 key.type = found_key.type;
6552                         }
6553                         key.type = found_key.type - 1;
6554                         key.offset = (u64)-1;
6555                         continue;
6556                 }
6557
6558                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6559                         found_key.objectid, found_key.type, found_key.offset);
6560
6561                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6562                 if (ret)
6563                         break;
6564                 btrfs_release_path(path);
6565
6566                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6567                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6568                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6569                                 found_key.offset : root->nodesize;
6570
6571                         ret = btrfs_update_block_group(trans, root, bytenr,
6572                                                        bytes, 0, 0);
6573                         if (ret)
6574                                 break;
6575                 }
6576         }
6577
6578         btrfs_release_path(path);
6579         return ret;
6580 }
6581
6582 /*
6583  * for a single backref, this will allocate a new extent
6584  * and add the backref to it.
6585  */
6586 static int record_extent(struct btrfs_trans_handle *trans,
6587                          struct btrfs_fs_info *info,
6588                          struct btrfs_path *path,
6589                          struct extent_record *rec,
6590                          struct extent_backref *back,
6591                          int allocated, u64 flags)
6592 {
6593         int ret;
6594         struct btrfs_root *extent_root = info->extent_root;
6595         struct extent_buffer *leaf;
6596         struct btrfs_key ins_key;
6597         struct btrfs_extent_item *ei;
6598         struct tree_backref *tback;
6599         struct data_backref *dback;
6600         struct btrfs_tree_block_info *bi;
6601
6602         if (!back->is_data)
6603                 rec->max_size = max_t(u64, rec->max_size,
6604                                     info->extent_root->nodesize);
6605
6606         if (!allocated) {
6607                 u32 item_size = sizeof(*ei);
6608
6609                 if (!back->is_data)
6610                         item_size += sizeof(*bi);
6611
6612                 ins_key.objectid = rec->start;
6613                 ins_key.offset = rec->max_size;
6614                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6615
6616                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6617                                         &ins_key, item_size);
6618                 if (ret)
6619                         goto fail;
6620
6621                 leaf = path->nodes[0];
6622                 ei = btrfs_item_ptr(leaf, path->slots[0],
6623                                     struct btrfs_extent_item);
6624
6625                 btrfs_set_extent_refs(leaf, ei, 0);
6626                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6627
6628                 if (back->is_data) {
6629                         btrfs_set_extent_flags(leaf, ei,
6630                                                BTRFS_EXTENT_FLAG_DATA);
6631                 } else {
6632                         struct btrfs_disk_key copy_key;;
6633
6634                         tback = to_tree_backref(back);
6635                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6636                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6637                                              sizeof(*bi));
6638
6639                         btrfs_set_disk_key_objectid(&copy_key,
6640                                                     rec->info_objectid);
6641                         btrfs_set_disk_key_type(&copy_key, 0);
6642                         btrfs_set_disk_key_offset(&copy_key, 0);
6643
6644                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6645                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6646
6647                         btrfs_set_extent_flags(leaf, ei,
6648                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6649                 }
6650
6651                 btrfs_mark_buffer_dirty(leaf);
6652                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6653                                                rec->max_size, 1, 0);
6654                 if (ret)
6655                         goto fail;
6656                 btrfs_release_path(path);
6657         }
6658
6659         if (back->is_data) {
6660                 u64 parent;
6661                 int i;
6662
6663                 dback = to_data_backref(back);
6664                 if (back->full_backref)
6665                         parent = dback->parent;
6666                 else
6667                         parent = 0;
6668
6669                 for (i = 0; i < dback->found_ref; i++) {
6670                         /* if parent != 0, we're doing a full backref
6671                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6672                          * just makes the backref allocator create a data
6673                          * backref
6674                          */
6675                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6676                                                    rec->start, rec->max_size,
6677                                                    parent,
6678                                                    dback->root,
6679                                                    parent ?
6680                                                    BTRFS_FIRST_FREE_OBJECTID :
6681                                                    dback->owner,
6682                                                    dback->offset);
6683                         if (ret)
6684                                 break;
6685                 }
6686                 fprintf(stderr, "adding new data backref"
6687                                 " on %llu %s %llu owner %llu"
6688                                 " offset %llu found %d\n",
6689                                 (unsigned long long)rec->start,
6690                                 back->full_backref ?
6691                                 "parent" : "root",
6692                                 back->full_backref ?
6693                                 (unsigned long long)parent :
6694                                 (unsigned long long)dback->root,
6695                                 (unsigned long long)dback->owner,
6696                                 (unsigned long long)dback->offset,
6697                                 dback->found_ref);
6698         } else {
6699                 u64 parent;
6700
6701                 tback = to_tree_backref(back);
6702                 if (back->full_backref)
6703                         parent = tback->parent;
6704                 else
6705                         parent = 0;
6706
6707                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6708                                            rec->start, rec->max_size,
6709                                            parent, tback->root, 0, 0);
6710                 fprintf(stderr, "adding new tree backref on "
6711                         "start %llu len %llu parent %llu root %llu\n",
6712                         rec->start, rec->max_size, parent, tback->root);
6713         }
6714 fail:
6715         btrfs_release_path(path);
6716         return ret;
6717 }
6718
6719 static struct extent_entry *find_entry(struct list_head *entries,
6720                                        u64 bytenr, u64 bytes)
6721 {
6722         struct extent_entry *entry = NULL;
6723
6724         list_for_each_entry(entry, entries, list) {
6725                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6726                         return entry;
6727         }
6728
6729         return NULL;
6730 }
6731
6732 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6733 {
6734         struct extent_entry *entry, *best = NULL, *prev = NULL;
6735
6736         list_for_each_entry(entry, entries, list) {
6737                 if (!prev) {
6738                         prev = entry;
6739                         continue;
6740                 }
6741
6742                 /*
6743                  * If there are as many broken entries as entries then we know
6744                  * not to trust this particular entry.
6745                  */
6746                 if (entry->broken == entry->count)
6747                         continue;
6748
6749                 /*
6750                  * If our current entry == best then we can't be sure our best
6751                  * is really the best, so we need to keep searching.
6752                  */
6753                 if (best && best->count == entry->count) {
6754                         prev = entry;
6755                         best = NULL;
6756                         continue;
6757                 }
6758
6759                 /* Prev == entry, not good enough, have to keep searching */
6760                 if (!prev->broken && prev->count == entry->count)
6761                         continue;
6762
6763                 if (!best)
6764                         best = (prev->count > entry->count) ? prev : entry;
6765                 else if (best->count < entry->count)
6766                         best = entry;
6767                 prev = entry;
6768         }
6769
6770         return best;
6771 }
6772
6773 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6774                       struct data_backref *dback, struct extent_entry *entry)
6775 {
6776         struct btrfs_trans_handle *trans;
6777         struct btrfs_root *root;
6778         struct btrfs_file_extent_item *fi;
6779         struct extent_buffer *leaf;
6780         struct btrfs_key key;
6781         u64 bytenr, bytes;
6782         int ret, err;
6783
6784         key.objectid = dback->root;
6785         key.type = BTRFS_ROOT_ITEM_KEY;
6786         key.offset = (u64)-1;
6787         root = btrfs_read_fs_root(info, &key);
6788         if (IS_ERR(root)) {
6789                 fprintf(stderr, "Couldn't find root for our ref\n");
6790                 return -EINVAL;
6791         }
6792
6793         /*
6794          * The backref points to the original offset of the extent if it was
6795          * split, so we need to search down to the offset we have and then walk
6796          * forward until we find the backref we're looking for.
6797          */
6798         key.objectid = dback->owner;
6799         key.type = BTRFS_EXTENT_DATA_KEY;
6800         key.offset = dback->offset;
6801         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6802         if (ret < 0) {
6803                 fprintf(stderr, "Error looking up ref %d\n", ret);
6804                 return ret;
6805         }
6806
6807         while (1) {
6808                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6809                         ret = btrfs_next_leaf(root, path);
6810                         if (ret) {
6811                                 fprintf(stderr, "Couldn't find our ref, next\n");
6812                                 return -EINVAL;
6813                         }
6814                 }
6815                 leaf = path->nodes[0];
6816                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6817                 if (key.objectid != dback->owner ||
6818                     key.type != BTRFS_EXTENT_DATA_KEY) {
6819                         fprintf(stderr, "Couldn't find our ref, search\n");
6820                         return -EINVAL;
6821                 }
6822                 fi = btrfs_item_ptr(leaf, path->slots[0],
6823                                     struct btrfs_file_extent_item);
6824                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6825                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6826
6827                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6828                         break;
6829                 path->slots[0]++;
6830         }
6831
6832         btrfs_release_path(path);
6833
6834         trans = btrfs_start_transaction(root, 1);
6835         if (IS_ERR(trans))
6836                 return PTR_ERR(trans);
6837
6838         /*
6839          * Ok we have the key of the file extent we want to fix, now we can cow
6840          * down to the thing and fix it.
6841          */
6842         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6843         if (ret < 0) {
6844                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6845                         key.objectid, key.type, key.offset, ret);
6846                 goto out;
6847         }
6848         if (ret > 0) {
6849                 fprintf(stderr, "Well that's odd, we just found this key "
6850                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6851                         key.offset);
6852                 ret = -EINVAL;
6853                 goto out;
6854         }
6855         leaf = path->nodes[0];
6856         fi = btrfs_item_ptr(leaf, path->slots[0],
6857                             struct btrfs_file_extent_item);
6858
6859         if (btrfs_file_extent_compression(leaf, fi) &&
6860             dback->disk_bytenr != entry->bytenr) {
6861                 fprintf(stderr, "Ref doesn't match the record start and is "
6862                         "compressed, please take a btrfs-image of this file "
6863                         "system and send it to a btrfs developer so they can "
6864                         "complete this functionality for bytenr %Lu\n",
6865                         dback->disk_bytenr);
6866                 ret = -EINVAL;
6867                 goto out;
6868         }
6869
6870         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6871                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6872         } else if (dback->disk_bytenr > entry->bytenr) {
6873                 u64 off_diff, offset;
6874
6875                 off_diff = dback->disk_bytenr - entry->bytenr;
6876                 offset = btrfs_file_extent_offset(leaf, fi);
6877                 if (dback->disk_bytenr + offset +
6878                     btrfs_file_extent_num_bytes(leaf, fi) >
6879                     entry->bytenr + entry->bytes) {
6880                         fprintf(stderr, "Ref is past the entry end, please "
6881                                 "take a btrfs-image of this file system and "
6882                                 "send it to a btrfs developer, ref %Lu\n",
6883                                 dback->disk_bytenr);
6884                         ret = -EINVAL;
6885                         goto out;
6886                 }
6887                 offset += off_diff;
6888                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6889                 btrfs_set_file_extent_offset(leaf, fi, offset);
6890         } else if (dback->disk_bytenr < entry->bytenr) {
6891                 u64 offset;
6892
6893                 offset = btrfs_file_extent_offset(leaf, fi);
6894                 if (dback->disk_bytenr + offset < entry->bytenr) {
6895                         fprintf(stderr, "Ref is before the entry start, please"
6896                                 " take a btrfs-image of this file system and "
6897                                 "send it to a btrfs developer, ref %Lu\n",
6898                                 dback->disk_bytenr);
6899                         ret = -EINVAL;
6900                         goto out;
6901                 }
6902
6903                 offset += dback->disk_bytenr;
6904                 offset -= entry->bytenr;
6905                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6906                 btrfs_set_file_extent_offset(leaf, fi, offset);
6907         }
6908
6909         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6910
6911         /*
6912          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6913          * only do this if we aren't using compression, otherwise it's a
6914          * trickier case.
6915          */
6916         if (!btrfs_file_extent_compression(leaf, fi))
6917                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6918         else
6919                 printf("ram bytes may be wrong?\n");
6920         btrfs_mark_buffer_dirty(leaf);
6921 out:
6922         err = btrfs_commit_transaction(trans, root);
6923         btrfs_release_path(path);
6924         return ret ? ret : err;
6925 }
6926
6927 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6928                            struct extent_record *rec)
6929 {
6930         struct extent_backref *back;
6931         struct data_backref *dback;
6932         struct extent_entry *entry, *best = NULL;
6933         LIST_HEAD(entries);
6934         int nr_entries = 0;
6935         int broken_entries = 0;
6936         int ret = 0;
6937         short mismatch = 0;
6938
6939         /*
6940          * Metadata is easy and the backrefs should always agree on bytenr and
6941          * size, if not we've got bigger issues.
6942          */
6943         if (rec->metadata)
6944                 return 0;
6945
6946         list_for_each_entry(back, &rec->backrefs, list) {
6947                 if (back->full_backref || !back->is_data)
6948                         continue;
6949
6950                 dback = to_data_backref(back);
6951
6952                 /*
6953                  * We only pay attention to backrefs that we found a real
6954                  * backref for.
6955                  */
6956                 if (dback->found_ref == 0)
6957                         continue;
6958
6959                 /*
6960                  * For now we only catch when the bytes don't match, not the
6961                  * bytenr.  We can easily do this at the same time, but I want
6962                  * to have a fs image to test on before we just add repair
6963                  * functionality willy-nilly so we know we won't screw up the
6964                  * repair.
6965                  */
6966
6967                 entry = find_entry(&entries, dback->disk_bytenr,
6968                                    dback->bytes);
6969                 if (!entry) {
6970                         entry = malloc(sizeof(struct extent_entry));
6971                         if (!entry) {
6972                                 ret = -ENOMEM;
6973                                 goto out;
6974                         }
6975                         memset(entry, 0, sizeof(*entry));
6976                         entry->bytenr = dback->disk_bytenr;
6977                         entry->bytes = dback->bytes;
6978                         list_add_tail(&entry->list, &entries);
6979                         nr_entries++;
6980                 }
6981
6982                 /*
6983                  * If we only have on entry we may think the entries agree when
6984                  * in reality they don't so we have to do some extra checking.
6985                  */
6986                 if (dback->disk_bytenr != rec->start ||
6987                     dback->bytes != rec->nr || back->broken)
6988                         mismatch = 1;
6989
6990                 if (back->broken) {
6991                         entry->broken++;
6992                         broken_entries++;
6993                 }
6994
6995                 entry->count++;
6996         }
6997
6998         /* Yay all the backrefs agree, carry on good sir */
6999         if (nr_entries <= 1 && !mismatch)
7000                 goto out;
7001
7002         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7003                 "%Lu\n", rec->start);
7004
7005         /*
7006          * First we want to see if the backrefs can agree amongst themselves who
7007          * is right, so figure out which one of the entries has the highest
7008          * count.
7009          */
7010         best = find_most_right_entry(&entries);
7011
7012         /*
7013          * Ok so we may have an even split between what the backrefs think, so
7014          * this is where we use the extent ref to see what it thinks.
7015          */
7016         if (!best) {
7017                 entry = find_entry(&entries, rec->start, rec->nr);
7018                 if (!entry && (!broken_entries || !rec->found_rec)) {
7019                         fprintf(stderr, "Backrefs don't agree with each other "
7020                                 "and extent record doesn't agree with anybody,"
7021                                 " so we can't fix bytenr %Lu bytes %Lu\n",
7022                                 rec->start, rec->nr);
7023                         ret = -EINVAL;
7024                         goto out;
7025                 } else if (!entry) {
7026                         /*
7027                          * Ok our backrefs were broken, we'll assume this is the
7028                          * correct value and add an entry for this range.
7029                          */
7030                         entry = malloc(sizeof(struct extent_entry));
7031                         if (!entry) {
7032                                 ret = -ENOMEM;
7033                                 goto out;
7034                         }
7035                         memset(entry, 0, sizeof(*entry));
7036                         entry->bytenr = rec->start;
7037                         entry->bytes = rec->nr;
7038                         list_add_tail(&entry->list, &entries);
7039                         nr_entries++;
7040                 }
7041                 entry->count++;
7042                 best = find_most_right_entry(&entries);
7043                 if (!best) {
7044                         fprintf(stderr, "Backrefs and extent record evenly "
7045                                 "split on who is right, this is going to "
7046                                 "require user input to fix bytenr %Lu bytes "
7047                                 "%Lu\n", rec->start, rec->nr);
7048                         ret = -EINVAL;
7049                         goto out;
7050                 }
7051         }
7052
7053         /*
7054          * I don't think this can happen currently as we'll abort() if we catch
7055          * this case higher up, but in case somebody removes that we still can't
7056          * deal with it properly here yet, so just bail out of that's the case.
7057          */
7058         if (best->bytenr != rec->start) {
7059                 fprintf(stderr, "Extent start and backref starts don't match, "
7060                         "please use btrfs-image on this file system and send "
7061                         "it to a btrfs developer so they can make fsck fix "
7062                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
7063                         rec->start, rec->nr);
7064                 ret = -EINVAL;
7065                 goto out;
7066         }
7067
7068         /*
7069          * Ok great we all agreed on an extent record, let's go find the real
7070          * references and fix up the ones that don't match.
7071          */
7072         list_for_each_entry(back, &rec->backrefs, list) {
7073                 if (back->full_backref || !back->is_data)
7074                         continue;
7075
7076                 dback = to_data_backref(back);
7077
7078                 /*
7079                  * Still ignoring backrefs that don't have a real ref attached
7080                  * to them.
7081                  */
7082                 if (dback->found_ref == 0)
7083                         continue;
7084
7085                 if (dback->bytes == best->bytes &&
7086                     dback->disk_bytenr == best->bytenr)
7087                         continue;
7088
7089                 ret = repair_ref(info, path, dback, best);
7090                 if (ret)
7091                         goto out;
7092         }
7093
7094         /*
7095          * Ok we messed with the actual refs, which means we need to drop our
7096          * entire cache and go back and rescan.  I know this is a huge pain and
7097          * adds a lot of extra work, but it's the only way to be safe.  Once all
7098          * the backrefs agree we may not need to do anything to the extent
7099          * record itself.
7100          */
7101         ret = -EAGAIN;
7102 out:
7103         while (!list_empty(&entries)) {
7104                 entry = list_entry(entries.next, struct extent_entry, list);
7105                 list_del_init(&entry->list);
7106                 free(entry);
7107         }
7108         return ret;
7109 }
7110
7111 static int process_duplicates(struct btrfs_root *root,
7112                               struct cache_tree *extent_cache,
7113                               struct extent_record *rec)
7114 {
7115         struct extent_record *good, *tmp;
7116         struct cache_extent *cache;
7117         int ret;
7118
7119         /*
7120          * If we found a extent record for this extent then return, or if we
7121          * have more than one duplicate we are likely going to need to delete
7122          * something.
7123          */
7124         if (rec->found_rec || rec->num_duplicates > 1)
7125                 return 0;
7126
7127         /* Shouldn't happen but just in case */
7128         BUG_ON(!rec->num_duplicates);
7129
7130         /*
7131          * So this happens if we end up with a backref that doesn't match the
7132          * actual extent entry.  So either the backref is bad or the extent
7133          * entry is bad.  Either way we want to have the extent_record actually
7134          * reflect what we found in the extent_tree, so we need to take the
7135          * duplicate out and use that as the extent_record since the only way we
7136          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7137          */
7138         remove_cache_extent(extent_cache, &rec->cache);
7139
7140         good = to_extent_record(rec->dups.next);
7141         list_del_init(&good->list);
7142         INIT_LIST_HEAD(&good->backrefs);
7143         INIT_LIST_HEAD(&good->dups);
7144         good->cache.start = good->start;
7145         good->cache.size = good->nr;
7146         good->content_checked = 0;
7147         good->owner_ref_checked = 0;
7148         good->num_duplicates = 0;
7149         good->refs = rec->refs;
7150         list_splice_init(&rec->backrefs, &good->backrefs);
7151         while (1) {
7152                 cache = lookup_cache_extent(extent_cache, good->start,
7153                                             good->nr);
7154                 if (!cache)
7155                         break;
7156                 tmp = container_of(cache, struct extent_record, cache);
7157
7158                 /*
7159                  * If we find another overlapping extent and it's found_rec is
7160                  * set then it's a duplicate and we need to try and delete
7161                  * something.
7162                  */
7163                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7164                         if (list_empty(&good->list))
7165                                 list_add_tail(&good->list,
7166                                               &duplicate_extents);
7167                         good->num_duplicates += tmp->num_duplicates + 1;
7168                         list_splice_init(&tmp->dups, &good->dups);
7169                         list_del_init(&tmp->list);
7170                         list_add_tail(&tmp->list, &good->dups);
7171                         remove_cache_extent(extent_cache, &tmp->cache);
7172                         continue;
7173                 }
7174
7175                 /*
7176                  * Ok we have another non extent item backed extent rec, so lets
7177                  * just add it to this extent and carry on like we did above.
7178                  */
7179                 good->refs += tmp->refs;
7180                 list_splice_init(&tmp->backrefs, &good->backrefs);
7181                 remove_cache_extent(extent_cache, &tmp->cache);
7182                 free(tmp);
7183         }
7184         ret = insert_cache_extent(extent_cache, &good->cache);
7185         BUG_ON(ret);
7186         free(rec);
7187         return good->num_duplicates ? 0 : 1;
7188 }
7189
7190 static int delete_duplicate_records(struct btrfs_root *root,
7191                                     struct extent_record *rec)
7192 {
7193         struct btrfs_trans_handle *trans;
7194         LIST_HEAD(delete_list);
7195         struct btrfs_path *path;
7196         struct extent_record *tmp, *good, *n;
7197         int nr_del = 0;
7198         int ret = 0, err;
7199         struct btrfs_key key;
7200
7201         path = btrfs_alloc_path();
7202         if (!path) {
7203                 ret = -ENOMEM;
7204                 goto out;
7205         }
7206
7207         good = rec;
7208         /* Find the record that covers all of the duplicates. */
7209         list_for_each_entry(tmp, &rec->dups, list) {
7210                 if (good->start < tmp->start)
7211                         continue;
7212                 if (good->nr > tmp->nr)
7213                         continue;
7214
7215                 if (tmp->start + tmp->nr < good->start + good->nr) {
7216                         fprintf(stderr, "Ok we have overlapping extents that "
7217                                 "aren't completely covered by each other, this "
7218                                 "is going to require more careful thought.  "
7219                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7220                                 tmp->start, tmp->nr, good->start, good->nr);
7221                         abort();
7222                 }
7223                 good = tmp;
7224         }
7225
7226         if (good != rec)
7227                 list_add_tail(&rec->list, &delete_list);
7228
7229         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7230                 if (tmp == good)
7231                         continue;
7232                 list_move_tail(&tmp->list, &delete_list);
7233         }
7234
7235         root = root->fs_info->extent_root;
7236         trans = btrfs_start_transaction(root, 1);
7237         if (IS_ERR(trans)) {
7238                 ret = PTR_ERR(trans);
7239                 goto out;
7240         }
7241
7242         list_for_each_entry(tmp, &delete_list, list) {
7243                 if (tmp->found_rec == 0)
7244                         continue;
7245                 key.objectid = tmp->start;
7246                 key.type = BTRFS_EXTENT_ITEM_KEY;
7247                 key.offset = tmp->nr;
7248
7249                 /* Shouldn't happen but just in case */
7250                 if (tmp->metadata) {
7251                         fprintf(stderr, "Well this shouldn't happen, extent "
7252                                 "record overlaps but is metadata? "
7253                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7254                         abort();
7255                 }
7256
7257                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7258                 if (ret) {
7259                         if (ret > 0)
7260                                 ret = -EINVAL;
7261                         break;
7262                 }
7263                 ret = btrfs_del_item(trans, root, path);
7264                 if (ret)
7265                         break;
7266                 btrfs_release_path(path);
7267                 nr_del++;
7268         }
7269         err = btrfs_commit_transaction(trans, root);
7270         if (err && !ret)
7271                 ret = err;
7272 out:
7273         while (!list_empty(&delete_list)) {
7274                 tmp = to_extent_record(delete_list.next);
7275                 list_del_init(&tmp->list);
7276                 if (tmp == rec)
7277                         continue;
7278                 free(tmp);
7279         }
7280
7281         while (!list_empty(&rec->dups)) {
7282                 tmp = to_extent_record(rec->dups.next);
7283                 list_del_init(&tmp->list);
7284                 free(tmp);
7285         }
7286
7287         btrfs_free_path(path);
7288
7289         if (!ret && !nr_del)
7290                 rec->num_duplicates = 0;
7291
7292         return ret ? ret : nr_del;
7293 }
7294
7295 static int find_possible_backrefs(struct btrfs_fs_info *info,
7296                                   struct btrfs_path *path,
7297                                   struct cache_tree *extent_cache,
7298                                   struct extent_record *rec)
7299 {
7300         struct btrfs_root *root;
7301         struct extent_backref *back;
7302         struct data_backref *dback;
7303         struct cache_extent *cache;
7304         struct btrfs_file_extent_item *fi;
7305         struct btrfs_key key;
7306         u64 bytenr, bytes;
7307         int ret;
7308
7309         list_for_each_entry(back, &rec->backrefs, list) {
7310                 /* Don't care about full backrefs (poor unloved backrefs) */
7311                 if (back->full_backref || !back->is_data)
7312                         continue;
7313
7314                 dback = to_data_backref(back);
7315
7316                 /* We found this one, we don't need to do a lookup */
7317                 if (dback->found_ref)
7318                         continue;
7319
7320                 key.objectid = dback->root;
7321                 key.type = BTRFS_ROOT_ITEM_KEY;
7322                 key.offset = (u64)-1;
7323
7324                 root = btrfs_read_fs_root(info, &key);
7325
7326                 /* No root, definitely a bad ref, skip */
7327                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7328                         continue;
7329                 /* Other err, exit */
7330                 if (IS_ERR(root))
7331                         return PTR_ERR(root);
7332
7333                 key.objectid = dback->owner;
7334                 key.type = BTRFS_EXTENT_DATA_KEY;
7335                 key.offset = dback->offset;
7336                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7337                 if (ret) {
7338                         btrfs_release_path(path);
7339                         if (ret < 0)
7340                                 return ret;
7341                         /* Didn't find it, we can carry on */
7342                         ret = 0;
7343                         continue;
7344                 }
7345
7346                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7347                                     struct btrfs_file_extent_item);
7348                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7349                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7350                 btrfs_release_path(path);
7351                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7352                 if (cache) {
7353                         struct extent_record *tmp;
7354                         tmp = container_of(cache, struct extent_record, cache);
7355
7356                         /*
7357                          * If we found an extent record for the bytenr for this
7358                          * particular backref then we can't add it to our
7359                          * current extent record.  We only want to add backrefs
7360                          * that don't have a corresponding extent item in the
7361                          * extent tree since they likely belong to this record
7362                          * and we need to fix it if it doesn't match bytenrs.
7363                          */
7364                         if  (tmp->found_rec)
7365                                 continue;
7366                 }
7367
7368                 dback->found_ref += 1;
7369                 dback->disk_bytenr = bytenr;
7370                 dback->bytes = bytes;
7371
7372                 /*
7373                  * Set this so the verify backref code knows not to trust the
7374                  * values in this backref.
7375                  */
7376                 back->broken = 1;
7377         }
7378
7379         return 0;
7380 }
7381
7382 /*
7383  * Record orphan data ref into corresponding root.
7384  *
7385  * Return 0 if the extent item contains data ref and recorded.
7386  * Return 1 if the extent item contains no useful data ref
7387  *   On that case, it may contains only shared_dataref or metadata backref
7388  *   or the file extent exists(this should be handled by the extent bytenr
7389  *   recovery routine)
7390  * Return <0 if something goes wrong.
7391  */
7392 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7393                                       struct extent_record *rec)
7394 {
7395         struct btrfs_key key;
7396         struct btrfs_root *dest_root;
7397         struct extent_backref *back;
7398         struct data_backref *dback;
7399         struct orphan_data_extent *orphan;
7400         struct btrfs_path *path;
7401         int recorded_data_ref = 0;
7402         int ret = 0;
7403
7404         if (rec->metadata)
7405                 return 1;
7406         path = btrfs_alloc_path();
7407         if (!path)
7408                 return -ENOMEM;
7409         list_for_each_entry(back, &rec->backrefs, list) {
7410                 if (back->full_backref || !back->is_data ||
7411                     !back->found_extent_tree)
7412                         continue;
7413                 dback = to_data_backref(back);
7414                 if (dback->found_ref)
7415                         continue;
7416                 key.objectid = dback->root;
7417                 key.type = BTRFS_ROOT_ITEM_KEY;
7418                 key.offset = (u64)-1;
7419
7420                 dest_root = btrfs_read_fs_root(fs_info, &key);
7421
7422                 /* For non-exist root we just skip it */
7423                 if (IS_ERR(dest_root) || !dest_root)
7424                         continue;
7425
7426                 key.objectid = dback->owner;
7427                 key.type = BTRFS_EXTENT_DATA_KEY;
7428                 key.offset = dback->offset;
7429
7430                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7431                 /*
7432                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7433                  * we need to record it for inode/file extent rebuild.
7434                  * For ret > 0, we record it only for file extent rebuild.
7435                  * For ret == 0, the file extent exists but only bytenr
7436                  * mismatch, let the original bytenr fix routine to handle,
7437                  * don't record it.
7438                  */
7439                 if (ret == 0)
7440                         continue;
7441                 ret = 0;
7442                 orphan = malloc(sizeof(*orphan));
7443                 if (!orphan) {
7444                         ret = -ENOMEM;
7445                         goto out;
7446                 }
7447                 INIT_LIST_HEAD(&orphan->list);
7448                 orphan->root = dback->root;
7449                 orphan->objectid = dback->owner;
7450                 orphan->offset = dback->offset;
7451                 orphan->disk_bytenr = rec->cache.start;
7452                 orphan->disk_len = rec->cache.size;
7453                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7454                 recorded_data_ref = 1;
7455         }
7456 out:
7457         btrfs_free_path(path);
7458         if (!ret)
7459                 return !recorded_data_ref;
7460         else
7461                 return ret;
7462 }
7463
7464 /*
7465  * when an incorrect extent item is found, this will delete
7466  * all of the existing entries for it and recreate them
7467  * based on what the tree scan found.
7468  */
7469 static int fixup_extent_refs(struct btrfs_fs_info *info,
7470                              struct cache_tree *extent_cache,
7471                              struct extent_record *rec)
7472 {
7473         struct btrfs_trans_handle *trans = NULL;
7474         int ret;
7475         struct btrfs_path *path;
7476         struct list_head *cur = rec->backrefs.next;
7477         struct cache_extent *cache;
7478         struct extent_backref *back;
7479         int allocated = 0;
7480         u64 flags = 0;
7481
7482         if (rec->flag_block_full_backref)
7483                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7484
7485         path = btrfs_alloc_path();
7486         if (!path)
7487                 return -ENOMEM;
7488
7489         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7490                 /*
7491                  * Sometimes the backrefs themselves are so broken they don't
7492                  * get attached to any meaningful rec, so first go back and
7493                  * check any of our backrefs that we couldn't find and throw
7494                  * them into the list if we find the backref so that
7495                  * verify_backrefs can figure out what to do.
7496                  */
7497                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7498                 if (ret < 0)
7499                         goto out;
7500         }
7501
7502         /* step one, make sure all of the backrefs agree */
7503         ret = verify_backrefs(info, path, rec);
7504         if (ret < 0)
7505                 goto out;
7506
7507         trans = btrfs_start_transaction(info->extent_root, 1);
7508         if (IS_ERR(trans)) {
7509                 ret = PTR_ERR(trans);
7510                 goto out;
7511         }
7512
7513         /* step two, delete all the existing records */
7514         ret = delete_extent_records(trans, info->extent_root, path,
7515                                     rec->start, rec->max_size);
7516
7517         if (ret < 0)
7518                 goto out;
7519
7520         /* was this block corrupt?  If so, don't add references to it */
7521         cache = lookup_cache_extent(info->corrupt_blocks,
7522                                     rec->start, rec->max_size);
7523         if (cache) {
7524                 ret = 0;
7525                 goto out;
7526         }
7527
7528         /* step three, recreate all the refs we did find */
7529         while(cur != &rec->backrefs) {
7530                 back = to_extent_backref(cur);
7531                 cur = cur->next;
7532
7533                 /*
7534                  * if we didn't find any references, don't create a
7535                  * new extent record
7536                  */
7537                 if (!back->found_ref)
7538                         continue;
7539
7540                 rec->bad_full_backref = 0;
7541                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7542                 allocated = 1;
7543
7544                 if (ret)
7545                         goto out;
7546         }
7547 out:
7548         if (trans) {
7549                 int err = btrfs_commit_transaction(trans, info->extent_root);
7550                 if (!ret)
7551                         ret = err;
7552         }
7553
7554         btrfs_free_path(path);
7555         return ret;
7556 }
7557
7558 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7559                               struct extent_record *rec)
7560 {
7561         struct btrfs_trans_handle *trans;
7562         struct btrfs_root *root = fs_info->extent_root;
7563         struct btrfs_path *path;
7564         struct btrfs_extent_item *ei;
7565         struct btrfs_key key;
7566         u64 flags;
7567         int ret = 0;
7568
7569         key.objectid = rec->start;
7570         if (rec->metadata) {
7571                 key.type = BTRFS_METADATA_ITEM_KEY;
7572                 key.offset = rec->info_level;
7573         } else {
7574                 key.type = BTRFS_EXTENT_ITEM_KEY;
7575                 key.offset = rec->max_size;
7576         }
7577
7578         path = btrfs_alloc_path();
7579         if (!path)
7580                 return -ENOMEM;
7581
7582         trans = btrfs_start_transaction(root, 0);
7583         if (IS_ERR(trans)) {
7584                 btrfs_free_path(path);
7585                 return PTR_ERR(trans);
7586         }
7587
7588         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7589         if (ret < 0) {
7590                 btrfs_free_path(path);
7591                 btrfs_commit_transaction(trans, root);
7592                 return ret;
7593         } else if (ret) {
7594                 fprintf(stderr, "Didn't find extent for %llu\n",
7595                         (unsigned long long)rec->start);
7596                 btrfs_free_path(path);
7597                 btrfs_commit_transaction(trans, root);
7598                 return -ENOENT;
7599         }
7600
7601         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7602                             struct btrfs_extent_item);
7603         flags = btrfs_extent_flags(path->nodes[0], ei);
7604         if (rec->flag_block_full_backref) {
7605                 fprintf(stderr, "setting full backref on %llu\n",
7606                         (unsigned long long)key.objectid);
7607                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7608         } else {
7609                 fprintf(stderr, "clearing full backref on %llu\n",
7610                         (unsigned long long)key.objectid);
7611                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7612         }
7613         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7614         btrfs_mark_buffer_dirty(path->nodes[0]);
7615         btrfs_free_path(path);
7616         return btrfs_commit_transaction(trans, root);
7617 }
7618
7619 /* right now we only prune from the extent allocation tree */
7620 static int prune_one_block(struct btrfs_trans_handle *trans,
7621                            struct btrfs_fs_info *info,
7622                            struct btrfs_corrupt_block *corrupt)
7623 {
7624         int ret;
7625         struct btrfs_path path;
7626         struct extent_buffer *eb;
7627         u64 found;
7628         int slot;
7629         int nritems;
7630         int level = corrupt->level + 1;
7631
7632         btrfs_init_path(&path);
7633 again:
7634         /* we want to stop at the parent to our busted block */
7635         path.lowest_level = level;
7636
7637         ret = btrfs_search_slot(trans, info->extent_root,
7638                                 &corrupt->key, &path, -1, 1);
7639
7640         if (ret < 0)
7641                 goto out;
7642
7643         eb = path.nodes[level];
7644         if (!eb) {
7645                 ret = -ENOENT;
7646                 goto out;
7647         }
7648
7649         /*
7650          * hopefully the search gave us the block we want to prune,
7651          * lets try that first
7652          */
7653         slot = path.slots[level];
7654         found =  btrfs_node_blockptr(eb, slot);
7655         if (found == corrupt->cache.start)
7656                 goto del_ptr;
7657
7658         nritems = btrfs_header_nritems(eb);
7659
7660         /* the search failed, lets scan this node and hope we find it */
7661         for (slot = 0; slot < nritems; slot++) {
7662                 found =  btrfs_node_blockptr(eb, slot);
7663                 if (found == corrupt->cache.start)
7664                         goto del_ptr;
7665         }
7666         /*
7667          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7668          * to this block
7669          */
7670         if (eb == info->extent_root->node) {
7671                 ret = -ENOENT;
7672                 goto out;
7673         } else {
7674                 level++;
7675                 btrfs_release_path(&path);
7676                 goto again;
7677         }
7678
7679 del_ptr:
7680         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7681         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7682
7683 out:
7684         btrfs_release_path(&path);
7685         return ret;
7686 }
7687
7688 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7689 {
7690         struct btrfs_trans_handle *trans = NULL;
7691         struct cache_extent *cache;
7692         struct btrfs_corrupt_block *corrupt;
7693
7694         while (1) {
7695                 cache = search_cache_extent(info->corrupt_blocks, 0);
7696                 if (!cache)
7697                         break;
7698                 if (!trans) {
7699                         trans = btrfs_start_transaction(info->extent_root, 1);
7700                         if (IS_ERR(trans))
7701                                 return PTR_ERR(trans);
7702                 }
7703                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7704                 prune_one_block(trans, info, corrupt);
7705                 remove_cache_extent(info->corrupt_blocks, cache);
7706         }
7707         if (trans)
7708                 return btrfs_commit_transaction(trans, info->extent_root);
7709         return 0;
7710 }
7711
7712 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7713 {
7714         struct btrfs_block_group_cache *cache;
7715         u64 start, end;
7716         int ret;
7717
7718         while (1) {
7719                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7720                                             &start, &end, EXTENT_DIRTY);
7721                 if (ret)
7722                         break;
7723                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7724                                    GFP_NOFS);
7725         }
7726
7727         start = 0;
7728         while (1) {
7729                 cache = btrfs_lookup_first_block_group(fs_info, start);
7730                 if (!cache)
7731                         break;
7732                 if (cache->cached)
7733                         cache->cached = 0;
7734                 start = cache->key.objectid + cache->key.offset;
7735         }
7736 }
7737
7738 static int check_extent_refs(struct btrfs_root *root,
7739                              struct cache_tree *extent_cache)
7740 {
7741         struct extent_record *rec;
7742         struct cache_extent *cache;
7743         int err = 0;
7744         int ret = 0;
7745         int fixed = 0;
7746         int had_dups = 0;
7747         int recorded = 0;
7748
7749         if (repair) {
7750                 /*
7751                  * if we're doing a repair, we have to make sure
7752                  * we don't allocate from the problem extents.
7753                  * In the worst case, this will be all the
7754                  * extents in the FS
7755                  */
7756                 cache = search_cache_extent(extent_cache, 0);
7757                 while(cache) {
7758                         rec = container_of(cache, struct extent_record, cache);
7759                         set_extent_dirty(root->fs_info->excluded_extents,
7760                                          rec->start,
7761                                          rec->start + rec->max_size - 1,
7762                                          GFP_NOFS);
7763                         cache = next_cache_extent(cache);
7764                 }
7765
7766                 /* pin down all the corrupted blocks too */
7767                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7768                 while(cache) {
7769                         set_extent_dirty(root->fs_info->excluded_extents,
7770                                          cache->start,
7771                                          cache->start + cache->size - 1,
7772                                          GFP_NOFS);
7773                         cache = next_cache_extent(cache);
7774                 }
7775                 prune_corrupt_blocks(root->fs_info);
7776                 reset_cached_block_groups(root->fs_info);
7777         }
7778
7779         reset_cached_block_groups(root->fs_info);
7780
7781         /*
7782          * We need to delete any duplicate entries we find first otherwise we
7783          * could mess up the extent tree when we have backrefs that actually
7784          * belong to a different extent item and not the weird duplicate one.
7785          */
7786         while (repair && !list_empty(&duplicate_extents)) {
7787                 rec = to_extent_record(duplicate_extents.next);
7788                 list_del_init(&rec->list);
7789
7790                 /* Sometimes we can find a backref before we find an actual
7791                  * extent, so we need to process it a little bit to see if there
7792                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7793                  * if this is a backref screwup.  If we need to delete stuff
7794                  * process_duplicates() will return 0, otherwise it will return
7795                  * 1 and we
7796                  */
7797                 if (process_duplicates(root, extent_cache, rec))
7798                         continue;
7799                 ret = delete_duplicate_records(root, rec);
7800                 if (ret < 0)
7801                         return ret;
7802                 /*
7803                  * delete_duplicate_records will return the number of entries
7804                  * deleted, so if it's greater than 0 then we know we actually
7805                  * did something and we need to remove.
7806                  */
7807                 if (ret)
7808                         had_dups = 1;
7809         }
7810
7811         if (had_dups)
7812                 return -EAGAIN;
7813
7814         while(1) {
7815                 int cur_err = 0;
7816
7817                 fixed = 0;
7818                 recorded = 0;
7819                 cache = search_cache_extent(extent_cache, 0);
7820                 if (!cache)
7821                         break;
7822                 rec = container_of(cache, struct extent_record, cache);
7823                 if (rec->num_duplicates) {
7824                         fprintf(stderr, "extent item %llu has multiple extent "
7825                                 "items\n", (unsigned long long)rec->start);
7826                         err = 1;
7827                         cur_err = 1;
7828                 }
7829
7830                 if (rec->refs != rec->extent_item_refs) {
7831                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7832                                 (unsigned long long)rec->start,
7833                                 (unsigned long long)rec->nr);
7834                         fprintf(stderr, "extent item %llu, found %llu\n",
7835                                 (unsigned long long)rec->extent_item_refs,
7836                                 (unsigned long long)rec->refs);
7837                         ret = record_orphan_data_extents(root->fs_info, rec);
7838                         if (ret < 0)
7839                                 goto repair_abort;
7840                         if (ret == 0) {
7841                                 recorded = 1;
7842                         } else {
7843                                 /*
7844                                  * we can't use the extent to repair file
7845                                  * extent, let the fallback method handle it.
7846                                  */
7847                                 if (!fixed && repair) {
7848                                         ret = fixup_extent_refs(
7849                                                         root->fs_info,
7850                                                         extent_cache, rec);
7851                                         if (ret)
7852                                                 goto repair_abort;
7853                                         fixed = 1;
7854                                 }
7855                         }
7856                         err = 1;
7857                         cur_err = 1;
7858                 }
7859                 if (all_backpointers_checked(rec, 1)) {
7860                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7861                                 (unsigned long long)rec->start,
7862                                 (unsigned long long)rec->nr);
7863
7864                         if (!fixed && !recorded && repair) {
7865                                 ret = fixup_extent_refs(root->fs_info,
7866                                                         extent_cache, rec);
7867                                 if (ret)
7868                                         goto repair_abort;
7869                                 fixed = 1;
7870                         }
7871                         cur_err = 1;
7872                         err = 1;
7873                 }
7874                 if (!rec->owner_ref_checked) {
7875                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7876                                 (unsigned long long)rec->start,
7877                                 (unsigned long long)rec->nr);
7878                         if (!fixed && !recorded && repair) {
7879                                 ret = fixup_extent_refs(root->fs_info,
7880                                                         extent_cache, rec);
7881                                 if (ret)
7882                                         goto repair_abort;
7883                                 fixed = 1;
7884                         }
7885                         err = 1;
7886                         cur_err = 1;
7887                 }
7888                 if (rec->bad_full_backref) {
7889                         fprintf(stderr, "bad full backref, on [%llu]\n",
7890                                 (unsigned long long)rec->start);
7891                         if (repair) {
7892                                 ret = fixup_extent_flags(root->fs_info, rec);
7893                                 if (ret)
7894                                         goto repair_abort;
7895                                 fixed = 1;
7896                         }
7897                         err = 1;
7898                         cur_err = 1;
7899                 }
7900                 /*
7901                  * Although it's not a extent ref's problem, we reuse this
7902                  * routine for error reporting.
7903                  * No repair function yet.
7904                  */
7905                 if (rec->crossing_stripes) {
7906                         fprintf(stderr,
7907                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7908                                 rec->start, rec->start + rec->max_size);
7909                         err = 1;
7910                         cur_err = 1;
7911                 }
7912
7913                 if (rec->wrong_chunk_type) {
7914                         fprintf(stderr,
7915                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7916                                 rec->start, rec->start + rec->max_size);
7917                         err = 1;
7918                         cur_err = 1;
7919                 }
7920
7921                 remove_cache_extent(extent_cache, cache);
7922                 free_all_extent_backrefs(rec);
7923                 if (!init_extent_tree && repair && (!cur_err || fixed))
7924                         clear_extent_dirty(root->fs_info->excluded_extents,
7925                                            rec->start,
7926                                            rec->start + rec->max_size - 1,
7927                                            GFP_NOFS);
7928                 free(rec);
7929         }
7930 repair_abort:
7931         if (repair) {
7932                 if (ret && ret != -EAGAIN) {
7933                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7934                         exit(1);
7935                 } else if (!ret) {
7936                         struct btrfs_trans_handle *trans;
7937
7938                         root = root->fs_info->extent_root;
7939                         trans = btrfs_start_transaction(root, 1);
7940                         if (IS_ERR(trans)) {
7941                                 ret = PTR_ERR(trans);
7942                                 goto repair_abort;
7943                         }
7944
7945                         btrfs_fix_block_accounting(trans, root);
7946                         ret = btrfs_commit_transaction(trans, root);
7947                         if (ret)
7948                                 goto repair_abort;
7949                 }
7950                 if (err)
7951                         fprintf(stderr, "repaired damaged extent references\n");
7952                 return ret;
7953         }
7954         return err;
7955 }
7956
7957 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7958 {
7959         u64 stripe_size;
7960
7961         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7962                 stripe_size = length;
7963                 stripe_size /= num_stripes;
7964         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7965                 stripe_size = length * 2;
7966                 stripe_size /= num_stripes;
7967         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7968                 stripe_size = length;
7969                 stripe_size /= (num_stripes - 1);
7970         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7971                 stripe_size = length;
7972                 stripe_size /= (num_stripes - 2);
7973         } else {
7974                 stripe_size = length;
7975         }
7976         return stripe_size;
7977 }
7978
7979 /*
7980  * Check the chunk with its block group/dev list ref:
7981  * Return 0 if all refs seems valid.
7982  * Return 1 if part of refs seems valid, need later check for rebuild ref
7983  * like missing block group and needs to search extent tree to rebuild them.
7984  * Return -1 if essential refs are missing and unable to rebuild.
7985  */
7986 static int check_chunk_refs(struct chunk_record *chunk_rec,
7987                             struct block_group_tree *block_group_cache,
7988                             struct device_extent_tree *dev_extent_cache,
7989                             int silent)
7990 {
7991         struct cache_extent *block_group_item;
7992         struct block_group_record *block_group_rec;
7993         struct cache_extent *dev_extent_item;
7994         struct device_extent_record *dev_extent_rec;
7995         u64 devid;
7996         u64 offset;
7997         u64 length;
7998         int metadump_v2 = 0;
7999         int i;
8000         int ret = 0;
8001
8002         block_group_item = lookup_cache_extent(&block_group_cache->tree,
8003                                                chunk_rec->offset,
8004                                                chunk_rec->length);
8005         if (block_group_item) {
8006                 block_group_rec = container_of(block_group_item,
8007                                                struct block_group_record,
8008                                                cache);
8009                 if (chunk_rec->length != block_group_rec->offset ||
8010                     chunk_rec->offset != block_group_rec->objectid ||
8011                     (!metadump_v2 &&
8012                      chunk_rec->type_flags != block_group_rec->flags)) {
8013                         if (!silent)
8014                                 fprintf(stderr,
8015                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8016                                         chunk_rec->objectid,
8017                                         chunk_rec->type,
8018                                         chunk_rec->offset,
8019                                         chunk_rec->length,
8020                                         chunk_rec->offset,
8021                                         chunk_rec->type_flags,
8022                                         block_group_rec->objectid,
8023                                         block_group_rec->type,
8024                                         block_group_rec->offset,
8025                                         block_group_rec->offset,
8026                                         block_group_rec->objectid,
8027                                         block_group_rec->flags);
8028                         ret = -1;
8029                 } else {
8030                         list_del_init(&block_group_rec->list);
8031                         chunk_rec->bg_rec = block_group_rec;
8032                 }
8033         } else {
8034                 if (!silent)
8035                         fprintf(stderr,
8036                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8037                                 chunk_rec->objectid,
8038                                 chunk_rec->type,
8039                                 chunk_rec->offset,
8040                                 chunk_rec->length,
8041                                 chunk_rec->offset,
8042                                 chunk_rec->type_flags);
8043                 ret = 1;
8044         }
8045
8046         if (metadump_v2)
8047                 return ret;
8048
8049         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8050                                     chunk_rec->num_stripes);
8051         for (i = 0; i < chunk_rec->num_stripes; ++i) {
8052                 devid = chunk_rec->stripes[i].devid;
8053                 offset = chunk_rec->stripes[i].offset;
8054                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8055                                                        devid, offset, length);
8056                 if (dev_extent_item) {
8057                         dev_extent_rec = container_of(dev_extent_item,
8058                                                 struct device_extent_record,
8059                                                 cache);
8060                         if (dev_extent_rec->objectid != devid ||
8061                             dev_extent_rec->offset != offset ||
8062                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
8063                             dev_extent_rec->length != length) {
8064                                 if (!silent)
8065                                         fprintf(stderr,
8066                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8067                                                 chunk_rec->objectid,
8068                                                 chunk_rec->type,
8069                                                 chunk_rec->offset,
8070                                                 chunk_rec->stripes[i].devid,
8071                                                 chunk_rec->stripes[i].offset,
8072                                                 dev_extent_rec->objectid,
8073                                                 dev_extent_rec->offset,
8074                                                 dev_extent_rec->length);
8075                                 ret = -1;
8076                         } else {
8077                                 list_move(&dev_extent_rec->chunk_list,
8078                                           &chunk_rec->dextents);
8079                         }
8080                 } else {
8081                         if (!silent)
8082                                 fprintf(stderr,
8083                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8084                                         chunk_rec->objectid,
8085                                         chunk_rec->type,
8086                                         chunk_rec->offset,
8087                                         chunk_rec->stripes[i].devid,
8088                                         chunk_rec->stripes[i].offset);
8089                         ret = -1;
8090                 }
8091         }
8092         return ret;
8093 }
8094
8095 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8096 int check_chunks(struct cache_tree *chunk_cache,
8097                  struct block_group_tree *block_group_cache,
8098                  struct device_extent_tree *dev_extent_cache,
8099                  struct list_head *good, struct list_head *bad,
8100                  struct list_head *rebuild, int silent)
8101 {
8102         struct cache_extent *chunk_item;
8103         struct chunk_record *chunk_rec;
8104         struct block_group_record *bg_rec;
8105         struct device_extent_record *dext_rec;
8106         int err;
8107         int ret = 0;
8108
8109         chunk_item = first_cache_extent(chunk_cache);
8110         while (chunk_item) {
8111                 chunk_rec = container_of(chunk_item, struct chunk_record,
8112                                          cache);
8113                 err = check_chunk_refs(chunk_rec, block_group_cache,
8114                                        dev_extent_cache, silent);
8115                 if (err < 0)
8116                         ret = err;
8117                 if (err == 0 && good)
8118                         list_add_tail(&chunk_rec->list, good);
8119                 if (err > 0 && rebuild)
8120                         list_add_tail(&chunk_rec->list, rebuild);
8121                 if (err < 0 && bad)
8122                         list_add_tail(&chunk_rec->list, bad);
8123                 chunk_item = next_cache_extent(chunk_item);
8124         }
8125
8126         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8127                 if (!silent)
8128                         fprintf(stderr,
8129                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8130                                 bg_rec->objectid,
8131                                 bg_rec->offset,
8132                                 bg_rec->flags);
8133                 if (!ret)
8134                         ret = 1;
8135         }
8136
8137         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8138                             chunk_list) {
8139                 if (!silent)
8140                         fprintf(stderr,
8141                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8142                                 dext_rec->objectid,
8143                                 dext_rec->offset,
8144                                 dext_rec->length);
8145                 if (!ret)
8146                         ret = 1;
8147         }
8148         return ret;
8149 }
8150
8151
8152 static int check_device_used(struct device_record *dev_rec,
8153                              struct device_extent_tree *dext_cache)
8154 {
8155         struct cache_extent *cache;
8156         struct device_extent_record *dev_extent_rec;
8157         u64 total_byte = 0;
8158
8159         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8160         while (cache) {
8161                 dev_extent_rec = container_of(cache,
8162                                               struct device_extent_record,
8163                                               cache);
8164                 if (dev_extent_rec->objectid != dev_rec->devid)
8165                         break;
8166
8167                 list_del_init(&dev_extent_rec->device_list);
8168                 total_byte += dev_extent_rec->length;
8169                 cache = next_cache_extent(cache);
8170         }
8171
8172         if (total_byte != dev_rec->byte_used) {
8173                 fprintf(stderr,
8174                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8175                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8176                         dev_rec->type, dev_rec->offset);
8177                 return -1;
8178         } else {
8179                 return 0;
8180         }
8181 }
8182
8183 /* check btrfs_dev_item -> btrfs_dev_extent */
8184 static int check_devices(struct rb_root *dev_cache,
8185                          struct device_extent_tree *dev_extent_cache)
8186 {
8187         struct rb_node *dev_node;
8188         struct device_record *dev_rec;
8189         struct device_extent_record *dext_rec;
8190         int err;
8191         int ret = 0;
8192
8193         dev_node = rb_first(dev_cache);
8194         while (dev_node) {
8195                 dev_rec = container_of(dev_node, struct device_record, node);
8196                 err = check_device_used(dev_rec, dev_extent_cache);
8197                 if (err)
8198                         ret = err;
8199
8200                 dev_node = rb_next(dev_node);
8201         }
8202         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8203                             device_list) {
8204                 fprintf(stderr,
8205                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8206                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8207                 if (!ret)
8208                         ret = 1;
8209         }
8210         return ret;
8211 }
8212
8213 static int add_root_item_to_list(struct list_head *head,
8214                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8215                                   u8 level, u8 drop_level,
8216                                   int level_size, struct btrfs_key *drop_key)
8217 {
8218
8219         struct root_item_record *ri_rec;
8220         ri_rec = malloc(sizeof(*ri_rec));
8221         if (!ri_rec)
8222                 return -ENOMEM;
8223         ri_rec->bytenr = bytenr;
8224         ri_rec->objectid = objectid;
8225         ri_rec->level = level;
8226         ri_rec->level_size = level_size;
8227         ri_rec->drop_level = drop_level;
8228         ri_rec->last_snapshot = last_snapshot;
8229         if (drop_key)
8230                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8231         list_add_tail(&ri_rec->list, head);
8232
8233         return 0;
8234 }
8235
8236 static void free_root_item_list(struct list_head *list)
8237 {
8238         struct root_item_record *ri_rec;
8239
8240         while (!list_empty(list)) {
8241                 ri_rec = list_first_entry(list, struct root_item_record,
8242                                           list);
8243                 list_del_init(&ri_rec->list);
8244                 free(ri_rec);
8245         }
8246 }
8247
8248 static int deal_root_from_list(struct list_head *list,
8249                                struct btrfs_root *root,
8250                                struct block_info *bits,
8251                                int bits_nr,
8252                                struct cache_tree *pending,
8253                                struct cache_tree *seen,
8254                                struct cache_tree *reada,
8255                                struct cache_tree *nodes,
8256                                struct cache_tree *extent_cache,
8257                                struct cache_tree *chunk_cache,
8258                                struct rb_root *dev_cache,
8259                                struct block_group_tree *block_group_cache,
8260                                struct device_extent_tree *dev_extent_cache)
8261 {
8262         int ret = 0;
8263         u64 last;
8264
8265         while (!list_empty(list)) {
8266                 struct root_item_record *rec;
8267                 struct extent_buffer *buf;
8268                 rec = list_entry(list->next,
8269                                  struct root_item_record, list);
8270                 last = 0;
8271                 buf = read_tree_block(root->fs_info->tree_root,
8272                                       rec->bytenr, rec->level_size, 0);
8273                 if (!extent_buffer_uptodate(buf)) {
8274                         free_extent_buffer(buf);
8275                         ret = -EIO;
8276                         break;
8277                 }
8278                 add_root_to_pending(buf, extent_cache, pending,
8279                                     seen, nodes, rec->objectid);
8280                 /*
8281                  * To rebuild extent tree, we need deal with snapshot
8282                  * one by one, otherwise we deal with node firstly which
8283                  * can maximize readahead.
8284                  */
8285                 while (1) {
8286                         ret = run_next_block(root, bits, bits_nr, &last,
8287                                              pending, seen, reada, nodes,
8288                                              extent_cache, chunk_cache,
8289                                              dev_cache, block_group_cache,
8290                                              dev_extent_cache, rec);
8291                         if (ret != 0)
8292                                 break;
8293                 }
8294                 free_extent_buffer(buf);
8295                 list_del(&rec->list);
8296                 free(rec);
8297                 if (ret < 0)
8298                         break;
8299         }
8300         while (ret >= 0) {
8301                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8302                                      reada, nodes, extent_cache, chunk_cache,
8303                                      dev_cache, block_group_cache,
8304                                      dev_extent_cache, NULL);
8305                 if (ret != 0) {
8306                         if (ret > 0)
8307                                 ret = 0;
8308                         break;
8309                 }
8310         }
8311         return ret;
8312 }
8313
8314 static int check_chunks_and_extents(struct btrfs_root *root)
8315 {
8316         struct rb_root dev_cache;
8317         struct cache_tree chunk_cache;
8318         struct block_group_tree block_group_cache;
8319         struct device_extent_tree dev_extent_cache;
8320         struct cache_tree extent_cache;
8321         struct cache_tree seen;
8322         struct cache_tree pending;
8323         struct cache_tree reada;
8324         struct cache_tree nodes;
8325         struct extent_io_tree excluded_extents;
8326         struct cache_tree corrupt_blocks;
8327         struct btrfs_path path;
8328         struct btrfs_key key;
8329         struct btrfs_key found_key;
8330         int ret, err = 0;
8331         struct block_info *bits;
8332         int bits_nr;
8333         struct extent_buffer *leaf;
8334         int slot;
8335         struct btrfs_root_item ri;
8336         struct list_head dropping_trees;
8337         struct list_head normal_trees;
8338         struct btrfs_root *root1;
8339         u64 objectid;
8340         u32 level_size;
8341         u8 level;
8342
8343         dev_cache = RB_ROOT;
8344         cache_tree_init(&chunk_cache);
8345         block_group_tree_init(&block_group_cache);
8346         device_extent_tree_init(&dev_extent_cache);
8347
8348         cache_tree_init(&extent_cache);
8349         cache_tree_init(&seen);
8350         cache_tree_init(&pending);
8351         cache_tree_init(&nodes);
8352         cache_tree_init(&reada);
8353         cache_tree_init(&corrupt_blocks);
8354         extent_io_tree_init(&excluded_extents);
8355         INIT_LIST_HEAD(&dropping_trees);
8356         INIT_LIST_HEAD(&normal_trees);
8357
8358         if (repair) {
8359                 root->fs_info->excluded_extents = &excluded_extents;
8360                 root->fs_info->fsck_extent_cache = &extent_cache;
8361                 root->fs_info->free_extent_hook = free_extent_hook;
8362                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8363         }
8364
8365         bits_nr = 1024;
8366         bits = malloc(bits_nr * sizeof(struct block_info));
8367         if (!bits) {
8368                 perror("malloc");
8369                 exit(1);
8370         }
8371
8372         if (ctx.progress_enabled) {
8373                 ctx.tp = TASK_EXTENTS;
8374                 task_start(ctx.info);
8375         }
8376
8377 again:
8378         root1 = root->fs_info->tree_root;
8379         level = btrfs_header_level(root1->node);
8380         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8381                                     root1->node->start, 0, level, 0,
8382                                     root1->nodesize, NULL);
8383         if (ret < 0)
8384                 goto out;
8385         root1 = root->fs_info->chunk_root;
8386         level = btrfs_header_level(root1->node);
8387         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8388                                     root1->node->start, 0, level, 0,
8389                                     root1->nodesize, NULL);
8390         if (ret < 0)
8391                 goto out;
8392         btrfs_init_path(&path);
8393         key.offset = 0;
8394         key.objectid = 0;
8395         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8396         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8397                                         &key, &path, 0, 0);
8398         if (ret < 0)
8399                 goto out;
8400         while(1) {
8401                 leaf = path.nodes[0];
8402                 slot = path.slots[0];
8403                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8404                         ret = btrfs_next_leaf(root, &path);
8405                         if (ret != 0)
8406                                 break;
8407                         leaf = path.nodes[0];
8408                         slot = path.slots[0];
8409                 }
8410                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8411                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8412                         unsigned long offset;
8413                         u64 last_snapshot;
8414
8415                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8416                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8417                         last_snapshot = btrfs_root_last_snapshot(&ri);
8418                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8419                                 level = btrfs_root_level(&ri);
8420                                 level_size = root->nodesize;
8421                                 ret = add_root_item_to_list(&normal_trees,
8422                                                 found_key.objectid,
8423                                                 btrfs_root_bytenr(&ri),
8424                                                 last_snapshot, level,
8425                                                 0, level_size, NULL);
8426                                 if (ret < 0)
8427                                         goto out;
8428                         } else {
8429                                 level = btrfs_root_level(&ri);
8430                                 level_size = root->nodesize;
8431                                 objectid = found_key.objectid;
8432                                 btrfs_disk_key_to_cpu(&found_key,
8433                                                       &ri.drop_progress);
8434                                 ret = add_root_item_to_list(&dropping_trees,
8435                                                 objectid,
8436                                                 btrfs_root_bytenr(&ri),
8437                                                 last_snapshot, level,
8438                                                 ri.drop_level,
8439                                                 level_size, &found_key);
8440                                 if (ret < 0)
8441                                         goto out;
8442                         }
8443                 }
8444                 path.slots[0]++;
8445         }
8446         btrfs_release_path(&path);
8447
8448         /*
8449          * check_block can return -EAGAIN if it fixes something, please keep
8450          * this in mind when dealing with return values from these functions, if
8451          * we get -EAGAIN we want to fall through and restart the loop.
8452          */
8453         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8454                                   &seen, &reada, &nodes, &extent_cache,
8455                                   &chunk_cache, &dev_cache, &block_group_cache,
8456                                   &dev_extent_cache);
8457         if (ret < 0) {
8458                 if (ret == -EAGAIN)
8459                         goto loop;
8460                 goto out;
8461         }
8462         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8463                                   &pending, &seen, &reada, &nodes,
8464                                   &extent_cache, &chunk_cache, &dev_cache,
8465                                   &block_group_cache, &dev_extent_cache);
8466         if (ret < 0) {
8467                 if (ret == -EAGAIN)
8468                         goto loop;
8469                 goto out;
8470         }
8471
8472         ret = check_chunks(&chunk_cache, &block_group_cache,
8473                            &dev_extent_cache, NULL, NULL, NULL, 0);
8474         if (ret) {
8475                 if (ret == -EAGAIN)
8476                         goto loop;
8477                 err = ret;
8478         }
8479
8480         ret = check_extent_refs(root, &extent_cache);
8481         if (ret < 0) {
8482                 if (ret == -EAGAIN)
8483                         goto loop;
8484                 goto out;
8485         }
8486
8487         ret = check_devices(&dev_cache, &dev_extent_cache);
8488         if (ret && err)
8489                 ret = err;
8490
8491 out:
8492         task_stop(ctx.info);
8493         if (repair) {
8494                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8495                 extent_io_tree_cleanup(&excluded_extents);
8496                 root->fs_info->fsck_extent_cache = NULL;
8497                 root->fs_info->free_extent_hook = NULL;
8498                 root->fs_info->corrupt_blocks = NULL;
8499                 root->fs_info->excluded_extents = NULL;
8500         }
8501         free(bits);
8502         free_chunk_cache_tree(&chunk_cache);
8503         free_device_cache_tree(&dev_cache);
8504         free_block_group_tree(&block_group_cache);
8505         free_device_extent_tree(&dev_extent_cache);
8506         free_extent_cache_tree(&seen);
8507         free_extent_cache_tree(&pending);
8508         free_extent_cache_tree(&reada);
8509         free_extent_cache_tree(&nodes);
8510         return ret;
8511 loop:
8512         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8513         free_extent_cache_tree(&seen);
8514         free_extent_cache_tree(&pending);
8515         free_extent_cache_tree(&reada);
8516         free_extent_cache_tree(&nodes);
8517         free_chunk_cache_tree(&chunk_cache);
8518         free_block_group_tree(&block_group_cache);
8519         free_device_cache_tree(&dev_cache);
8520         free_device_extent_tree(&dev_extent_cache);
8521         free_extent_record_cache(root->fs_info, &extent_cache);
8522         free_root_item_list(&normal_trees);
8523         free_root_item_list(&dropping_trees);
8524         extent_io_tree_cleanup(&excluded_extents);
8525         goto again;
8526 }
8527
8528 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8529                            struct btrfs_root *root, int overwrite)
8530 {
8531         struct extent_buffer *c;
8532         struct extent_buffer *old = root->node;
8533         int level;
8534         int ret;
8535         struct btrfs_disk_key disk_key = {0,0,0};
8536
8537         level = 0;
8538
8539         if (overwrite) {
8540                 c = old;
8541                 extent_buffer_get(c);
8542                 goto init;
8543         }
8544         c = btrfs_alloc_free_block(trans, root,
8545                                    root->nodesize,
8546                                    root->root_key.objectid,
8547                                    &disk_key, level, 0, 0);
8548         if (IS_ERR(c)) {
8549                 c = old;
8550                 extent_buffer_get(c);
8551                 overwrite = 1;
8552         }
8553 init:
8554         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8555         btrfs_set_header_level(c, level);
8556         btrfs_set_header_bytenr(c, c->start);
8557         btrfs_set_header_generation(c, trans->transid);
8558         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8559         btrfs_set_header_owner(c, root->root_key.objectid);
8560
8561         write_extent_buffer(c, root->fs_info->fsid,
8562                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8563
8564         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8565                             btrfs_header_chunk_tree_uuid(c),
8566                             BTRFS_UUID_SIZE);
8567
8568         btrfs_mark_buffer_dirty(c);
8569         /*
8570          * this case can happen in the following case:
8571          *
8572          * 1.overwrite previous root.
8573          *
8574          * 2.reinit reloc data root, this is because we skip pin
8575          * down reloc data tree before which means we can allocate
8576          * same block bytenr here.
8577          */
8578         if (old->start == c->start) {
8579                 btrfs_set_root_generation(&root->root_item,
8580                                           trans->transid);
8581                 root->root_item.level = btrfs_header_level(root->node);
8582                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8583                                         &root->root_key, &root->root_item);
8584                 if (ret) {
8585                         free_extent_buffer(c);
8586                         return ret;
8587                 }
8588         }
8589         free_extent_buffer(old);
8590         root->node = c;
8591         add_root_to_dirty_list(root);
8592         return 0;
8593 }
8594
8595 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8596                                 struct extent_buffer *eb, int tree_root)
8597 {
8598         struct extent_buffer *tmp;
8599         struct btrfs_root_item *ri;
8600         struct btrfs_key key;
8601         u64 bytenr;
8602         u32 nodesize;
8603         int level = btrfs_header_level(eb);
8604         int nritems;
8605         int ret;
8606         int i;
8607
8608         /*
8609          * If we have pinned this block before, don't pin it again.
8610          * This can not only avoid forever loop with broken filesystem
8611          * but also give us some speedups.
8612          */
8613         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8614                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8615                 return 0;
8616
8617         btrfs_pin_extent(fs_info, eb->start, eb->len);
8618
8619         nodesize = btrfs_super_nodesize(fs_info->super_copy);
8620         nritems = btrfs_header_nritems(eb);
8621         for (i = 0; i < nritems; i++) {
8622                 if (level == 0) {
8623                         btrfs_item_key_to_cpu(eb, &key, i);
8624                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8625                                 continue;
8626                         /* Skip the extent root and reloc roots */
8627                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8628                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8629                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8630                                 continue;
8631                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8632                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8633
8634                         /*
8635                          * If at any point we start needing the real root we
8636                          * will have to build a stump root for the root we are
8637                          * in, but for now this doesn't actually use the root so
8638                          * just pass in extent_root.
8639                          */
8640                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8641                                               nodesize, 0);
8642                         if (!extent_buffer_uptodate(tmp)) {
8643                                 fprintf(stderr, "Error reading root block\n");
8644                                 return -EIO;
8645                         }
8646                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8647                         free_extent_buffer(tmp);
8648                         if (ret)
8649                                 return ret;
8650                 } else {
8651                         bytenr = btrfs_node_blockptr(eb, i);
8652
8653                         /* If we aren't the tree root don't read the block */
8654                         if (level == 1 && !tree_root) {
8655                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
8656                                 continue;
8657                         }
8658
8659                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8660                                               nodesize, 0);
8661                         if (!extent_buffer_uptodate(tmp)) {
8662                                 fprintf(stderr, "Error reading tree block\n");
8663                                 return -EIO;
8664                         }
8665                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8666                         free_extent_buffer(tmp);
8667                         if (ret)
8668                                 return ret;
8669                 }
8670         }
8671
8672         return 0;
8673 }
8674
8675 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8676 {
8677         int ret;
8678
8679         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8680         if (ret)
8681                 return ret;
8682
8683         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8684 }
8685
8686 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8687 {
8688         struct btrfs_block_group_cache *cache;
8689         struct btrfs_path *path;
8690         struct extent_buffer *leaf;
8691         struct btrfs_chunk *chunk;
8692         struct btrfs_key key;
8693         int ret;
8694         u64 start;
8695
8696         path = btrfs_alloc_path();
8697         if (!path)
8698                 return -ENOMEM;
8699
8700         key.objectid = 0;
8701         key.type = BTRFS_CHUNK_ITEM_KEY;
8702         key.offset = 0;
8703
8704         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8705         if (ret < 0) {
8706                 btrfs_free_path(path);
8707                 return ret;
8708         }
8709
8710         /*
8711          * We do this in case the block groups were screwed up and had alloc
8712          * bits that aren't actually set on the chunks.  This happens with
8713          * restored images every time and could happen in real life I guess.
8714          */
8715         fs_info->avail_data_alloc_bits = 0;
8716         fs_info->avail_metadata_alloc_bits = 0;
8717         fs_info->avail_system_alloc_bits = 0;
8718
8719         /* First we need to create the in-memory block groups */
8720         while (1) {
8721                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8722                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8723                         if (ret < 0) {
8724                                 btrfs_free_path(path);
8725                                 return ret;
8726                         }
8727                         if (ret) {
8728                                 ret = 0;
8729                                 break;
8730                         }
8731                 }
8732                 leaf = path->nodes[0];
8733                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8734                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8735                         path->slots[0]++;
8736                         continue;
8737                 }
8738
8739                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8740                                        struct btrfs_chunk);
8741                 btrfs_add_block_group(fs_info, 0,
8742                                       btrfs_chunk_type(leaf, chunk),
8743                                       key.objectid, key.offset,
8744                                       btrfs_chunk_length(leaf, chunk));
8745                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8746                                  key.offset + btrfs_chunk_length(leaf, chunk),
8747                                  GFP_NOFS);
8748                 path->slots[0]++;
8749         }
8750         start = 0;
8751         while (1) {
8752                 cache = btrfs_lookup_first_block_group(fs_info, start);
8753                 if (!cache)
8754                         break;
8755                 cache->cached = 1;
8756                 start = cache->key.objectid + cache->key.offset;
8757         }
8758
8759         btrfs_free_path(path);
8760         return 0;
8761 }
8762
8763 static int reset_balance(struct btrfs_trans_handle *trans,
8764                          struct btrfs_fs_info *fs_info)
8765 {
8766         struct btrfs_root *root = fs_info->tree_root;
8767         struct btrfs_path *path;
8768         struct extent_buffer *leaf;
8769         struct btrfs_key key;
8770         int del_slot, del_nr = 0;
8771         int ret;
8772         int found = 0;
8773
8774         path = btrfs_alloc_path();
8775         if (!path)
8776                 return -ENOMEM;
8777
8778         key.objectid = BTRFS_BALANCE_OBJECTID;
8779         key.type = BTRFS_BALANCE_ITEM_KEY;
8780         key.offset = 0;
8781
8782         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8783         if (ret) {
8784                 if (ret > 0)
8785                         ret = 0;
8786                 if (!ret)
8787                         goto reinit_data_reloc;
8788                 else
8789                         goto out;
8790         }
8791
8792         ret = btrfs_del_item(trans, root, path);
8793         if (ret)
8794                 goto out;
8795         btrfs_release_path(path);
8796
8797         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8798         key.type = BTRFS_ROOT_ITEM_KEY;
8799         key.offset = 0;
8800
8801         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8802         if (ret < 0)
8803                 goto out;
8804         while (1) {
8805                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8806                         if (!found)
8807                                 break;
8808
8809                         if (del_nr) {
8810                                 ret = btrfs_del_items(trans, root, path,
8811                                                       del_slot, del_nr);
8812                                 del_nr = 0;
8813                                 if (ret)
8814                                         goto out;
8815                         }
8816                         key.offset++;
8817                         btrfs_release_path(path);
8818
8819                         found = 0;
8820                         ret = btrfs_search_slot(trans, root, &key, path,
8821                                                 -1, 1);
8822                         if (ret < 0)
8823                                 goto out;
8824                         continue;
8825                 }
8826                 found = 1;
8827                 leaf = path->nodes[0];
8828                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8829                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8830                         break;
8831                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8832                         path->slots[0]++;
8833                         continue;
8834                 }
8835                 if (!del_nr) {
8836                         del_slot = path->slots[0];
8837                         del_nr = 1;
8838                 } else {
8839                         del_nr++;
8840                 }
8841                 path->slots[0]++;
8842         }
8843
8844         if (del_nr) {
8845                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8846                 if (ret)
8847                         goto out;
8848         }
8849         btrfs_release_path(path);
8850
8851 reinit_data_reloc:
8852         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8853         key.type = BTRFS_ROOT_ITEM_KEY;
8854         key.offset = (u64)-1;
8855         root = btrfs_read_fs_root(fs_info, &key);
8856         if (IS_ERR(root)) {
8857                 fprintf(stderr, "Error reading data reloc tree\n");
8858                 ret = PTR_ERR(root);
8859                 goto out;
8860         }
8861         record_root_in_trans(trans, root);
8862         ret = btrfs_fsck_reinit_root(trans, root, 0);
8863         if (ret)
8864                 goto out;
8865         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8866 out:
8867         btrfs_free_path(path);
8868         return ret;
8869 }
8870
8871 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8872                               struct btrfs_fs_info *fs_info)
8873 {
8874         u64 start = 0;
8875         int ret;
8876
8877         /*
8878          * The only reason we don't do this is because right now we're just
8879          * walking the trees we find and pinning down their bytes, we don't look
8880          * at any of the leaves.  In order to do mixed groups we'd have to check
8881          * the leaves of any fs roots and pin down the bytes for any file
8882          * extents we find.  Not hard but why do it if we don't have to?
8883          */
8884         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8885                 fprintf(stderr, "We don't support re-initing the extent tree "
8886                         "for mixed block groups yet, please notify a btrfs "
8887                         "developer you want to do this so they can add this "
8888                         "functionality.\n");
8889                 return -EINVAL;
8890         }
8891
8892         /*
8893          * first we need to walk all of the trees except the extent tree and pin
8894          * down the bytes that are in use so we don't overwrite any existing
8895          * metadata.
8896          */
8897         ret = pin_metadata_blocks(fs_info);
8898         if (ret) {
8899                 fprintf(stderr, "error pinning down used bytes\n");
8900                 return ret;
8901         }
8902
8903         /*
8904          * Need to drop all the block groups since we're going to recreate all
8905          * of them again.
8906          */
8907         btrfs_free_block_groups(fs_info);
8908         ret = reset_block_groups(fs_info);
8909         if (ret) {
8910                 fprintf(stderr, "error resetting the block groups\n");
8911                 return ret;
8912         }
8913
8914         /* Ok we can allocate now, reinit the extent root */
8915         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8916         if (ret) {
8917                 fprintf(stderr, "extent root initialization failed\n");
8918                 /*
8919                  * When the transaction code is updated we should end the
8920                  * transaction, but for now progs only knows about commit so
8921                  * just return an error.
8922                  */
8923                 return ret;
8924         }
8925
8926         /*
8927          * Now we have all the in-memory block groups setup so we can make
8928          * allocations properly, and the metadata we care about is safe since we
8929          * pinned all of it above.
8930          */
8931         while (1) {
8932                 struct btrfs_block_group_cache *cache;
8933
8934                 cache = btrfs_lookup_first_block_group(fs_info, start);
8935                 if (!cache)
8936                         break;
8937                 start = cache->key.objectid + cache->key.offset;
8938                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8939                                         &cache->key, &cache->item,
8940                                         sizeof(cache->item));
8941                 if (ret) {
8942                         fprintf(stderr, "Error adding block group\n");
8943                         return ret;
8944                 }
8945                 btrfs_extent_post_op(trans, fs_info->extent_root);
8946         }
8947
8948         ret = reset_balance(trans, fs_info);
8949         if (ret)
8950                 fprintf(stderr, "error resetting the pending balance\n");
8951
8952         return ret;
8953 }
8954
8955 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8956 {
8957         struct btrfs_path *path;
8958         struct btrfs_trans_handle *trans;
8959         struct btrfs_key key;
8960         int ret;
8961
8962         printf("Recowing metadata block %llu\n", eb->start);
8963         key.objectid = btrfs_header_owner(eb);
8964         key.type = BTRFS_ROOT_ITEM_KEY;
8965         key.offset = (u64)-1;
8966
8967         root = btrfs_read_fs_root(root->fs_info, &key);
8968         if (IS_ERR(root)) {
8969                 fprintf(stderr, "Couldn't find owner root %llu\n",
8970                         key.objectid);
8971                 return PTR_ERR(root);
8972         }
8973
8974         path = btrfs_alloc_path();
8975         if (!path)
8976                 return -ENOMEM;
8977
8978         trans = btrfs_start_transaction(root, 1);
8979         if (IS_ERR(trans)) {
8980                 btrfs_free_path(path);
8981                 return PTR_ERR(trans);
8982         }
8983
8984         path->lowest_level = btrfs_header_level(eb);
8985         if (path->lowest_level)
8986                 btrfs_node_key_to_cpu(eb, &key, 0);
8987         else
8988                 btrfs_item_key_to_cpu(eb, &key, 0);
8989
8990         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8991         btrfs_commit_transaction(trans, root);
8992         btrfs_free_path(path);
8993         return ret;
8994 }
8995
8996 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8997 {
8998         struct btrfs_path *path;
8999         struct btrfs_trans_handle *trans;
9000         struct btrfs_key key;
9001         int ret;
9002
9003         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9004                bad->key.type, bad->key.offset);
9005         key.objectid = bad->root_id;
9006         key.type = BTRFS_ROOT_ITEM_KEY;
9007         key.offset = (u64)-1;
9008
9009         root = btrfs_read_fs_root(root->fs_info, &key);
9010         if (IS_ERR(root)) {
9011                 fprintf(stderr, "Couldn't find owner root %llu\n",
9012                         key.objectid);
9013                 return PTR_ERR(root);
9014         }
9015
9016         path = btrfs_alloc_path();
9017         if (!path)
9018                 return -ENOMEM;
9019
9020         trans = btrfs_start_transaction(root, 1);
9021         if (IS_ERR(trans)) {
9022                 btrfs_free_path(path);
9023                 return PTR_ERR(trans);
9024         }
9025
9026         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9027         if (ret) {
9028                 if (ret > 0)
9029                         ret = 0;
9030                 goto out;
9031         }
9032         ret = btrfs_del_item(trans, root, path);
9033 out:
9034         btrfs_commit_transaction(trans, root);
9035         btrfs_free_path(path);
9036         return ret;
9037 }
9038
9039 static int zero_log_tree(struct btrfs_root *root)
9040 {
9041         struct btrfs_trans_handle *trans;
9042         int ret;
9043
9044         trans = btrfs_start_transaction(root, 1);
9045         if (IS_ERR(trans)) {
9046                 ret = PTR_ERR(trans);
9047                 return ret;
9048         }
9049         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9050         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9051         ret = btrfs_commit_transaction(trans, root);
9052         return ret;
9053 }
9054
9055 static int populate_csum(struct btrfs_trans_handle *trans,
9056                          struct btrfs_root *csum_root, char *buf, u64 start,
9057                          u64 len)
9058 {
9059         u64 offset = 0;
9060         u64 sectorsize;
9061         int ret = 0;
9062
9063         while (offset < len) {
9064                 sectorsize = csum_root->sectorsize;
9065                 ret = read_extent_data(csum_root, buf, start + offset,
9066                                        &sectorsize, 0);
9067                 if (ret)
9068                         break;
9069                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9070                                             start + offset, buf, sectorsize);
9071                 if (ret)
9072                         break;
9073                 offset += sectorsize;
9074         }
9075         return ret;
9076 }
9077
9078 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9079                                       struct btrfs_root *csum_root,
9080                                       struct btrfs_root *cur_root)
9081 {
9082         struct btrfs_path *path;
9083         struct btrfs_key key;
9084         struct extent_buffer *node;
9085         struct btrfs_file_extent_item *fi;
9086         char *buf = NULL;
9087         u64 start = 0;
9088         u64 len = 0;
9089         int slot = 0;
9090         int ret = 0;
9091
9092         path = btrfs_alloc_path();
9093         if (!path)
9094                 return -ENOMEM;
9095         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9096         if (!buf) {
9097                 ret = -ENOMEM;
9098                 goto out;
9099         }
9100
9101         key.objectid = 0;
9102         key.offset = 0;
9103         key.type = 0;
9104
9105         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9106         if (ret < 0)
9107                 goto out;
9108         /* Iterate all regular file extents and fill its csum */
9109         while (1) {
9110                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9111
9112                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9113                         goto next;
9114                 node = path->nodes[0];
9115                 slot = path->slots[0];
9116                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9117                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9118                         goto next;
9119                 start = btrfs_file_extent_disk_bytenr(node, fi);
9120                 len = btrfs_file_extent_disk_num_bytes(node, fi);
9121
9122                 ret = populate_csum(trans, csum_root, buf, start, len);
9123                 if (ret == -EEXIST)
9124                         ret = 0;
9125                 if (ret < 0)
9126                         goto out;
9127 next:
9128                 /*
9129                  * TODO: if next leaf is corrupted, jump to nearest next valid
9130                  * leaf.
9131                  */
9132                 ret = btrfs_next_item(cur_root, path);
9133                 if (ret < 0)
9134                         goto out;
9135                 if (ret > 0) {
9136                         ret = 0;
9137                         goto out;
9138                 }
9139         }
9140
9141 out:
9142         btrfs_free_path(path);
9143         free(buf);
9144         return ret;
9145 }
9146
9147 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9148                                   struct btrfs_root *csum_root)
9149 {
9150         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9151         struct btrfs_path *path;
9152         struct btrfs_root *tree_root = fs_info->tree_root;
9153         struct btrfs_root *cur_root;
9154         struct extent_buffer *node;
9155         struct btrfs_key key;
9156         int slot = 0;
9157         int ret = 0;
9158
9159         path = btrfs_alloc_path();
9160         if (!path)
9161                 return -ENOMEM;
9162
9163         key.objectid = BTRFS_FS_TREE_OBJECTID;
9164         key.offset = 0;
9165         key.type = BTRFS_ROOT_ITEM_KEY;
9166
9167         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9168         if (ret < 0)
9169                 goto out;
9170         if (ret > 0) {
9171                 ret = -ENOENT;
9172                 goto out;
9173         }
9174
9175         while (1) {
9176                 node = path->nodes[0];
9177                 slot = path->slots[0];
9178                 btrfs_item_key_to_cpu(node, &key, slot);
9179                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9180                         goto out;
9181                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9182                         goto next;
9183                 if (!is_fstree(key.objectid))
9184                         goto next;
9185                 key.offset = (u64)-1;
9186
9187                 cur_root = btrfs_read_fs_root(fs_info, &key);
9188                 if (IS_ERR(cur_root) || !cur_root) {
9189                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9190                                 key.objectid);
9191                         goto out;
9192                 }
9193                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9194                                 cur_root);
9195                 if (ret < 0)
9196                         goto out;
9197 next:
9198                 ret = btrfs_next_item(tree_root, path);
9199                 if (ret > 0) {
9200                         ret = 0;
9201                         goto out;
9202                 }
9203                 if (ret < 0)
9204                         goto out;
9205         }
9206
9207 out:
9208         btrfs_free_path(path);
9209         return ret;
9210 }
9211
9212 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9213                                       struct btrfs_root *csum_root)
9214 {
9215         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9216         struct btrfs_path *path;
9217         struct btrfs_extent_item *ei;
9218         struct extent_buffer *leaf;
9219         char *buf;
9220         struct btrfs_key key;
9221         int ret;
9222
9223         path = btrfs_alloc_path();
9224         if (!path)
9225                 return -ENOMEM;
9226
9227         key.objectid = 0;
9228         key.type = BTRFS_EXTENT_ITEM_KEY;
9229         key.offset = 0;
9230
9231         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9232         if (ret < 0) {
9233                 btrfs_free_path(path);
9234                 return ret;
9235         }
9236
9237         buf = malloc(csum_root->sectorsize);
9238         if (!buf) {
9239                 btrfs_free_path(path);
9240                 return -ENOMEM;
9241         }
9242
9243         while (1) {
9244                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9245                         ret = btrfs_next_leaf(extent_root, path);
9246                         if (ret < 0)
9247                                 break;
9248                         if (ret) {
9249                                 ret = 0;
9250                                 break;
9251                         }
9252                 }
9253                 leaf = path->nodes[0];
9254
9255                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9256                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9257                         path->slots[0]++;
9258                         continue;
9259                 }
9260
9261                 ei = btrfs_item_ptr(leaf, path->slots[0],
9262                                     struct btrfs_extent_item);
9263                 if (!(btrfs_extent_flags(leaf, ei) &
9264                       BTRFS_EXTENT_FLAG_DATA)) {
9265                         path->slots[0]++;
9266                         continue;
9267                 }
9268
9269                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9270                                     key.offset);
9271                 if (ret)
9272                         break;
9273                 path->slots[0]++;
9274         }
9275
9276         btrfs_free_path(path);
9277         free(buf);
9278         return ret;
9279 }
9280
9281 /*
9282  * Recalculate the csum and put it into the csum tree.
9283  *
9284  * Extent tree init will wipe out all the extent info, so in that case, we
9285  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9286  * will use fs/subvol trees to init the csum tree.
9287  */
9288 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9289                           struct btrfs_root *csum_root,
9290                           int search_fs_tree)
9291 {
9292         if (search_fs_tree)
9293                 return fill_csum_tree_from_fs(trans, csum_root);
9294         else
9295                 return fill_csum_tree_from_extent(trans, csum_root);
9296 }
9297
9298 static void free_roots_info_cache(void)
9299 {
9300         if (!roots_info_cache)
9301                 return;
9302
9303         while (!cache_tree_empty(roots_info_cache)) {
9304                 struct cache_extent *entry;
9305                 struct root_item_info *rii;
9306
9307                 entry = first_cache_extent(roots_info_cache);
9308                 if (!entry)
9309                         break;
9310                 remove_cache_extent(roots_info_cache, entry);
9311                 rii = container_of(entry, struct root_item_info, cache_extent);
9312                 free(rii);
9313         }
9314
9315         free(roots_info_cache);
9316         roots_info_cache = NULL;
9317 }
9318
9319 static int build_roots_info_cache(struct btrfs_fs_info *info)
9320 {
9321         int ret = 0;
9322         struct btrfs_key key;
9323         struct extent_buffer *leaf;
9324         struct btrfs_path *path;
9325
9326         if (!roots_info_cache) {
9327                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9328                 if (!roots_info_cache)
9329                         return -ENOMEM;
9330                 cache_tree_init(roots_info_cache);
9331         }
9332
9333         path = btrfs_alloc_path();
9334         if (!path)
9335                 return -ENOMEM;
9336
9337         key.objectid = 0;
9338         key.type = BTRFS_EXTENT_ITEM_KEY;
9339         key.offset = 0;
9340
9341         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9342         if (ret < 0)
9343                 goto out;
9344         leaf = path->nodes[0];
9345
9346         while (1) {
9347                 struct btrfs_key found_key;
9348                 struct btrfs_extent_item *ei;
9349                 struct btrfs_extent_inline_ref *iref;
9350                 int slot = path->slots[0];
9351                 int type;
9352                 u64 flags;
9353                 u64 root_id;
9354                 u8 level;
9355                 struct cache_extent *entry;
9356                 struct root_item_info *rii;
9357
9358                 if (slot >= btrfs_header_nritems(leaf)) {
9359                         ret = btrfs_next_leaf(info->extent_root, path);
9360                         if (ret < 0) {
9361                                 break;
9362                         } else if (ret) {
9363                                 ret = 0;
9364                                 break;
9365                         }
9366                         leaf = path->nodes[0];
9367                         slot = path->slots[0];
9368                 }
9369
9370                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9371
9372                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9373                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9374                         goto next;
9375
9376                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9377                 flags = btrfs_extent_flags(leaf, ei);
9378
9379                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9380                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9381                         goto next;
9382
9383                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9384                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9385                         level = found_key.offset;
9386                 } else {
9387                         struct btrfs_tree_block_info *binfo;
9388
9389                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9390                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9391                         level = btrfs_tree_block_level(leaf, binfo);
9392                 }
9393
9394                 /*
9395                  * For a root extent, it must be of the following type and the
9396                  * first (and only one) iref in the item.
9397                  */
9398                 type = btrfs_extent_inline_ref_type(leaf, iref);
9399                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9400                         goto next;
9401
9402                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9403                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9404                 if (!entry) {
9405                         rii = malloc(sizeof(struct root_item_info));
9406                         if (!rii) {
9407                                 ret = -ENOMEM;
9408                                 goto out;
9409                         }
9410                         rii->cache_extent.start = root_id;
9411                         rii->cache_extent.size = 1;
9412                         rii->level = (u8)-1;
9413                         entry = &rii->cache_extent;
9414                         ret = insert_cache_extent(roots_info_cache, entry);
9415                         ASSERT(ret == 0);
9416                 } else {
9417                         rii = container_of(entry, struct root_item_info,
9418                                            cache_extent);
9419                 }
9420
9421                 ASSERT(rii->cache_extent.start == root_id);
9422                 ASSERT(rii->cache_extent.size == 1);
9423
9424                 if (level > rii->level || rii->level == (u8)-1) {
9425                         rii->level = level;
9426                         rii->bytenr = found_key.objectid;
9427                         rii->gen = btrfs_extent_generation(leaf, ei);
9428                         rii->node_count = 1;
9429                 } else if (level == rii->level) {
9430                         rii->node_count++;
9431                 }
9432 next:
9433                 path->slots[0]++;
9434         }
9435
9436 out:
9437         btrfs_free_path(path);
9438
9439         return ret;
9440 }
9441
9442 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9443                                   struct btrfs_path *path,
9444                                   const struct btrfs_key *root_key,
9445                                   const int read_only_mode)
9446 {
9447         const u64 root_id = root_key->objectid;
9448         struct cache_extent *entry;
9449         struct root_item_info *rii;
9450         struct btrfs_root_item ri;
9451         unsigned long offset;
9452
9453         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9454         if (!entry) {
9455                 fprintf(stderr,
9456                         "Error: could not find extent items for root %llu\n",
9457                         root_key->objectid);
9458                 return -ENOENT;
9459         }
9460
9461         rii = container_of(entry, struct root_item_info, cache_extent);
9462         ASSERT(rii->cache_extent.start == root_id);
9463         ASSERT(rii->cache_extent.size == 1);
9464
9465         if (rii->node_count != 1) {
9466                 fprintf(stderr,
9467                         "Error: could not find btree root extent for root %llu\n",
9468                         root_id);
9469                 return -ENOENT;
9470         }
9471
9472         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9473         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9474
9475         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9476             btrfs_root_level(&ri) != rii->level ||
9477             btrfs_root_generation(&ri) != rii->gen) {
9478
9479                 /*
9480                  * If we're in repair mode but our caller told us to not update
9481                  * the root item, i.e. just check if it needs to be updated, don't
9482                  * print this message, since the caller will call us again shortly
9483                  * for the same root item without read only mode (the caller will
9484                  * open a transaction first).
9485                  */
9486                 if (!(read_only_mode && repair))
9487                         fprintf(stderr,
9488                                 "%sroot item for root %llu,"
9489                                 " current bytenr %llu, current gen %llu, current level %u,"
9490                                 " new bytenr %llu, new gen %llu, new level %u\n",
9491                                 (read_only_mode ? "" : "fixing "),
9492                                 root_id,
9493                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9494                                 btrfs_root_level(&ri),
9495                                 rii->bytenr, rii->gen, rii->level);
9496
9497                 if (btrfs_root_generation(&ri) > rii->gen) {
9498                         fprintf(stderr,
9499                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9500                                 root_id, btrfs_root_generation(&ri), rii->gen);
9501                         return -EINVAL;
9502                 }
9503
9504                 if (!read_only_mode) {
9505                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9506                         btrfs_set_root_level(&ri, rii->level);
9507                         btrfs_set_root_generation(&ri, rii->gen);
9508                         write_extent_buffer(path->nodes[0], &ri,
9509                                             offset, sizeof(ri));
9510                 }
9511
9512                 return 1;
9513         }
9514
9515         return 0;
9516 }
9517
9518 /*
9519  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9520  * caused read-only snapshots to be corrupted if they were created at a moment
9521  * when the source subvolume/snapshot had orphan items. The issue was that the
9522  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9523  * node instead of the post orphan cleanup root node.
9524  * So this function, and its callees, just detects and fixes those cases. Even
9525  * though the regression was for read-only snapshots, this function applies to
9526  * any snapshot/subvolume root.
9527  * This must be run before any other repair code - not doing it so, makes other
9528  * repair code delete or modify backrefs in the extent tree for example, which
9529  * will result in an inconsistent fs after repairing the root items.
9530  */
9531 static int repair_root_items(struct btrfs_fs_info *info)
9532 {
9533         struct btrfs_path *path = NULL;
9534         struct btrfs_key key;
9535         struct extent_buffer *leaf;
9536         struct btrfs_trans_handle *trans = NULL;
9537         int ret = 0;
9538         int bad_roots = 0;
9539         int need_trans = 0;
9540
9541         ret = build_roots_info_cache(info);
9542         if (ret)
9543                 goto out;
9544
9545         path = btrfs_alloc_path();
9546         if (!path) {
9547                 ret = -ENOMEM;
9548                 goto out;
9549         }
9550
9551         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9552         key.type = BTRFS_ROOT_ITEM_KEY;
9553         key.offset = 0;
9554
9555 again:
9556         /*
9557          * Avoid opening and committing transactions if a leaf doesn't have
9558          * any root items that need to be fixed, so that we avoid rotating
9559          * backup roots unnecessarily.
9560          */
9561         if (need_trans) {
9562                 trans = btrfs_start_transaction(info->tree_root, 1);
9563                 if (IS_ERR(trans)) {
9564                         ret = PTR_ERR(trans);
9565                         goto out;
9566                 }
9567         }
9568
9569         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9570                                 0, trans ? 1 : 0);
9571         if (ret < 0)
9572                 goto out;
9573         leaf = path->nodes[0];
9574
9575         while (1) {
9576                 struct btrfs_key found_key;
9577
9578                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9579                         int no_more_keys = find_next_key(path, &key);
9580
9581                         btrfs_release_path(path);
9582                         if (trans) {
9583                                 ret = btrfs_commit_transaction(trans,
9584                                                                info->tree_root);
9585                                 trans = NULL;
9586                                 if (ret < 0)
9587                                         goto out;
9588                         }
9589                         need_trans = 0;
9590                         if (no_more_keys)
9591                                 break;
9592                         goto again;
9593                 }
9594
9595                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9596
9597                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9598                         goto next;
9599                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9600                         goto next;
9601
9602                 ret = maybe_repair_root_item(info, path, &found_key,
9603                                              trans ? 0 : 1);
9604                 if (ret < 0)
9605                         goto out;
9606                 if (ret) {
9607                         if (!trans && repair) {
9608                                 need_trans = 1;
9609                                 key = found_key;
9610                                 btrfs_release_path(path);
9611                                 goto again;
9612                         }
9613                         bad_roots++;
9614                 }
9615 next:
9616                 path->slots[0]++;
9617         }
9618         ret = 0;
9619 out:
9620         free_roots_info_cache();
9621         btrfs_free_path(path);
9622         if (trans)
9623                 btrfs_commit_transaction(trans, info->tree_root);
9624         if (ret < 0)
9625                 return ret;
9626
9627         return bad_roots;
9628 }
9629
9630 const char * const cmd_check_usage[] = {
9631         "btrfs check [options] <device>",
9632         "Check structural integrity of a filesystem (unmounted).",
9633         "Check structural integrity of an unmounted filesystem. Verify internal",
9634         "trees' consistency and item connectivity. In the repair mode try to",
9635         "fix the problems found.",
9636         "WARNING: the repair mode is considered dangerous",
9637         "",
9638         "-s|--super <superblock>     use this superblock copy",
9639         "-b|--backup                 use the first valid backup root copy",
9640         "--repair                    try to repair the filesystem",
9641         "--readonly                  run in read-only mode (default)",
9642         "--init-csum-tree            create a new CRC tree",
9643         "--init-extent-tree          create a new extent tree",
9644         "--check-data-csum           verify checksums of data blocks",
9645         "-Q|--qgroup-report           print a report on qgroup consistency",
9646         "-E|--subvol-extents <subvolid>",
9647         "                            print subvolume extents and sharing state",
9648         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9649         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9650         "-p|--progress               indicate progress",
9651         NULL
9652 };
9653
9654 int cmd_check(int argc, char **argv)
9655 {
9656         struct cache_tree root_cache;
9657         struct btrfs_root *root;
9658         struct btrfs_fs_info *info;
9659         u64 bytenr = 0;
9660         u64 subvolid = 0;
9661         u64 tree_root_bytenr = 0;
9662         u64 chunk_root_bytenr = 0;
9663         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9664         int ret;
9665         u64 num;
9666         int init_csum_tree = 0;
9667         int readonly = 0;
9668         int qgroup_report = 0;
9669         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9670
9671         while(1) {
9672                 int c;
9673                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9674                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9675                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9676                 static const struct option long_options[] = {
9677                         { "super", required_argument, NULL, 's' },
9678                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9679                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9680                         { "init-csum-tree", no_argument, NULL,
9681                                 GETOPT_VAL_INIT_CSUM },
9682                         { "init-extent-tree", no_argument, NULL,
9683                                 GETOPT_VAL_INIT_EXTENT },
9684                         { "check-data-csum", no_argument, NULL,
9685                                 GETOPT_VAL_CHECK_CSUM },
9686                         { "backup", no_argument, NULL, 'b' },
9687                         { "subvol-extents", required_argument, NULL, 'E' },
9688                         { "qgroup-report", no_argument, NULL, 'Q' },
9689                         { "tree-root", required_argument, NULL, 'r' },
9690                         { "chunk-root", required_argument, NULL,
9691                                 GETOPT_VAL_CHUNK_TREE },
9692                         { "progress", no_argument, NULL, 'p' },
9693                         { NULL, 0, NULL, 0}
9694                 };
9695
9696                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9697                 if (c < 0)
9698                         break;
9699                 switch(c) {
9700                         case 'a': /* ignored */ break;
9701                         case 'b':
9702                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9703                                 break;
9704                         case 's':
9705                                 num = arg_strtou64(optarg);
9706                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9707                                         fprintf(stderr,
9708                                                 "ERROR: super mirror should be less than: %d\n",
9709                                                 BTRFS_SUPER_MIRROR_MAX);
9710                                         exit(1);
9711                                 }
9712                                 bytenr = btrfs_sb_offset(((int)num));
9713                                 printf("using SB copy %llu, bytenr %llu\n", num,
9714                                        (unsigned long long)bytenr);
9715                                 break;
9716                         case 'Q':
9717                                 qgroup_report = 1;
9718                                 break;
9719                         case 'E':
9720                                 subvolid = arg_strtou64(optarg);
9721                                 break;
9722                         case 'r':
9723                                 tree_root_bytenr = arg_strtou64(optarg);
9724                                 break;
9725                         case GETOPT_VAL_CHUNK_TREE:
9726                                 chunk_root_bytenr = arg_strtou64(optarg);
9727                                 break;
9728                         case 'p':
9729                                 ctx.progress_enabled = true;
9730                                 break;
9731                         case '?':
9732                         case 'h':
9733                                 usage(cmd_check_usage);
9734                         case GETOPT_VAL_REPAIR:
9735                                 printf("enabling repair mode\n");
9736                                 repair = 1;
9737                                 ctree_flags |= OPEN_CTREE_WRITES;
9738                                 break;
9739                         case GETOPT_VAL_READONLY:
9740                                 readonly = 1;
9741                                 break;
9742                         case GETOPT_VAL_INIT_CSUM:
9743                                 printf("Creating a new CRC tree\n");
9744                                 init_csum_tree = 1;
9745                                 repair = 1;
9746                                 ctree_flags |= OPEN_CTREE_WRITES;
9747                                 break;
9748                         case GETOPT_VAL_INIT_EXTENT:
9749                                 init_extent_tree = 1;
9750                                 ctree_flags |= (OPEN_CTREE_WRITES |
9751                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9752                                 repair = 1;
9753                                 break;
9754                         case GETOPT_VAL_CHECK_CSUM:
9755                                 check_data_csum = 1;
9756                                 break;
9757                 }
9758         }
9759
9760         if (check_argc_exact(argc - optind, 1))
9761                 usage(cmd_check_usage);
9762
9763         if (ctx.progress_enabled) {
9764                 ctx.tp = TASK_NOTHING;
9765                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9766         }
9767
9768         /* This check is the only reason for --readonly to exist */
9769         if (readonly && repair) {
9770                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9771                 exit(1);
9772         }
9773
9774         radix_tree_init();
9775         cache_tree_init(&root_cache);
9776
9777         if((ret = check_mounted(argv[optind])) < 0) {
9778                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9779                 goto err_out;
9780         } else if(ret) {
9781                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9782                 ret = -EBUSY;
9783                 goto err_out;
9784         }
9785
9786         /* only allow partial opening under repair mode */
9787         if (repair)
9788                 ctree_flags |= OPEN_CTREE_PARTIAL;
9789
9790         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9791                                   chunk_root_bytenr, ctree_flags);
9792         if (!info) {
9793                 fprintf(stderr, "Couldn't open file system\n");
9794                 ret = -EIO;
9795                 goto err_out;
9796         }
9797
9798         global_info = info;
9799         root = info->fs_root;
9800
9801         /*
9802          * repair mode will force us to commit transaction which
9803          * will make us fail to load log tree when mounting.
9804          */
9805         if (repair && btrfs_super_log_root(info->super_copy)) {
9806                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9807                 if (!ret) {
9808                         ret = 1;
9809                         goto close_out;
9810                 }
9811                 ret = zero_log_tree(root);
9812                 if (ret) {
9813                         fprintf(stderr, "fail to zero log tree\n");
9814                         goto close_out;
9815                 }
9816         }
9817
9818         uuid_unparse(info->super_copy->fsid, uuidbuf);
9819         if (qgroup_report) {
9820                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9821                        uuidbuf);
9822                 ret = qgroup_verify_all(info);
9823                 if (ret == 0)
9824                         ret = report_qgroups(1);
9825                 goto close_out;
9826         }
9827         if (subvolid) {
9828                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9829                        subvolid, argv[optind], uuidbuf);
9830                 ret = print_extent_state(info, subvolid);
9831                 goto close_out;
9832         }
9833         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9834
9835         if (!extent_buffer_uptodate(info->tree_root->node) ||
9836             !extent_buffer_uptodate(info->dev_root->node) ||
9837             !extent_buffer_uptodate(info->chunk_root->node)) {
9838                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9839                 ret = -EIO;
9840                 goto close_out;
9841         }
9842
9843         if (init_extent_tree || init_csum_tree) {
9844                 struct btrfs_trans_handle *trans;
9845
9846                 trans = btrfs_start_transaction(info->extent_root, 0);
9847                 if (IS_ERR(trans)) {
9848                         fprintf(stderr, "Error starting transaction\n");
9849                         ret = PTR_ERR(trans);
9850                         goto close_out;
9851                 }
9852
9853                 if (init_extent_tree) {
9854                         printf("Creating a new extent tree\n");
9855                         ret = reinit_extent_tree(trans, info);
9856                         if (ret)
9857                                 goto close_out;
9858                 }
9859
9860                 if (init_csum_tree) {
9861                         fprintf(stderr, "Reinit crc root\n");
9862                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9863                         if (ret) {
9864                                 fprintf(stderr, "crc root initialization failed\n");
9865                                 ret = -EIO;
9866                                 goto close_out;
9867                         }
9868
9869                         ret = fill_csum_tree(trans, info->csum_root,
9870                                              init_extent_tree);
9871                         if (ret) {
9872                                 fprintf(stderr, "crc refilling failed\n");
9873                                 return -EIO;
9874                         }
9875                 }
9876                 /*
9877                  * Ok now we commit and run the normal fsck, which will add
9878                  * extent entries for all of the items it finds.
9879                  */
9880                 ret = btrfs_commit_transaction(trans, info->extent_root);
9881                 if (ret)
9882                         goto close_out;
9883         }
9884         if (!extent_buffer_uptodate(info->extent_root->node)) {
9885                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9886                 ret = -EIO;
9887                 goto close_out;
9888         }
9889         if (!extent_buffer_uptodate(info->csum_root->node)) {
9890                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9891                 ret = -EIO;
9892                 goto close_out;
9893         }
9894
9895         if (!ctx.progress_enabled)
9896                 fprintf(stderr, "checking extents\n");
9897         ret = check_chunks_and_extents(root);
9898         if (ret)
9899                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9900
9901         ret = repair_root_items(info);
9902         if (ret < 0)
9903                 goto close_out;
9904         if (repair) {
9905                 fprintf(stderr, "Fixed %d roots.\n", ret);
9906                 ret = 0;
9907         } else if (ret > 0) {
9908                 fprintf(stderr,
9909                        "Found %d roots with an outdated root item.\n",
9910                        ret);
9911                 fprintf(stderr,
9912                         "Please run a filesystem check with the option --repair to fix them.\n");
9913                 ret = 1;
9914                 goto close_out;
9915         }
9916
9917         if (!ctx.progress_enabled) {
9918                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9919                         fprintf(stderr, "checking free space tree\n");
9920                 else
9921                         fprintf(stderr, "checking free space cache\n");
9922         }
9923         ret = check_space_cache(root);
9924         if (ret)
9925                 goto out;
9926
9927         /*
9928          * We used to have to have these hole extents in between our real
9929          * extents so if we don't have this flag set we need to make sure there
9930          * are no gaps in the file extents for inodes, otherwise we can just
9931          * ignore it when this happens.
9932          */
9933         no_holes = btrfs_fs_incompat(root->fs_info,
9934                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9935         if (!ctx.progress_enabled)
9936                 fprintf(stderr, "checking fs roots\n");
9937         ret = check_fs_roots(root, &root_cache);
9938         if (ret)
9939                 goto out;
9940
9941         fprintf(stderr, "checking csums\n");
9942         ret = check_csums(root);
9943         if (ret)
9944                 goto out;
9945
9946         fprintf(stderr, "checking root refs\n");
9947         ret = check_root_refs(root, &root_cache);
9948         if (ret)
9949                 goto out;
9950
9951         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9952                 struct extent_buffer *eb;
9953
9954                 eb = list_first_entry(&root->fs_info->recow_ebs,
9955                                       struct extent_buffer, recow);
9956                 list_del_init(&eb->recow);
9957                 ret = recow_extent_buffer(root, eb);
9958                 if (ret)
9959                         break;
9960         }
9961
9962         while (!list_empty(&delete_items)) {
9963                 struct bad_item *bad;
9964
9965                 bad = list_first_entry(&delete_items, struct bad_item, list);
9966                 list_del_init(&bad->list);
9967                 if (repair)
9968                         ret = delete_bad_item(root, bad);
9969                 free(bad);
9970         }
9971
9972         if (info->quota_enabled) {
9973                 int err;
9974                 fprintf(stderr, "checking quota groups\n");
9975                 err = qgroup_verify_all(info);
9976                 if (err)
9977                         goto out;
9978         }
9979
9980         if (!list_empty(&root->fs_info->recow_ebs)) {
9981                 fprintf(stderr, "Transid errors in file system\n");
9982                 ret = 1;
9983         }
9984 out:
9985         /* Don't override original ret */
9986         if (ret)
9987                 report_qgroups(0);
9988         else
9989                 ret = report_qgroups(0);
9990         if (found_old_backref) { /*
9991                  * there was a disk format change when mixed
9992                  * backref was in testing tree. The old format
9993                  * existed about one week.
9994                  */
9995                 printf("\n * Found old mixed backref format. "
9996                        "The old format is not supported! *"
9997                        "\n * Please mount the FS in readonly mode, "
9998                        "backup data and re-format the FS. *\n\n");
9999                 ret = 1;
10000         }
10001         printf("found %llu bytes used err is %d\n",
10002                (unsigned long long)bytes_used, ret);
10003         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10004         printf("total tree bytes: %llu\n",
10005                (unsigned long long)total_btree_bytes);
10006         printf("total fs tree bytes: %llu\n",
10007                (unsigned long long)total_fs_tree_bytes);
10008         printf("total extent tree bytes: %llu\n",
10009                (unsigned long long)total_extent_tree_bytes);
10010         printf("btree space waste bytes: %llu\n",
10011                (unsigned long long)btree_space_waste);
10012         printf("file data blocks allocated: %llu\n referenced %llu\n",
10013                 (unsigned long long)data_bytes_allocated,
10014                 (unsigned long long)data_bytes_referenced);
10015
10016         free_qgroup_counts();
10017         free_root_recs_tree(&root_cache);
10018 close_out:
10019         close_ctree(root);
10020 err_out:
10021         if (ctx.progress_enabled)
10022                 task_deinit(ctx.info);
10023
10024         return ret;
10025 }