btrfs-progs: check: add helpers for converting between structures
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 struct extent_backref {
79         struct list_head list;
80         unsigned int is_data:1;
81         unsigned int found_extent_tree:1;
82         unsigned int full_backref:1;
83         unsigned int found_ref:1;
84         unsigned int broken:1;
85 };
86
87 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
88 {
89         return list_entry(entry, struct extent_backref, list);
90 }
91
92 struct data_backref {
93         struct extent_backref node;
94         union {
95                 u64 parent;
96                 u64 root;
97         };
98         u64 owner;
99         u64 offset;
100         u64 disk_bytenr;
101         u64 bytes;
102         u64 ram_bytes;
103         u32 num_refs;
104         u32 found_ref;
105 };
106
107 static inline struct data_backref* to_data_backref(struct extent_backref *back)
108 {
109         return container_of(back, struct data_backref, node);
110 }
111
112 /*
113  * Much like data_backref, just removed the undetermined members
114  * and change it to use list_head.
115  * During extent scan, it is stored in root->orphan_data_extent.
116  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
117  */
118 struct orphan_data_extent {
119         struct list_head list;
120         u64 root;
121         u64 objectid;
122         u64 offset;
123         u64 disk_bytenr;
124         u64 disk_len;
125 };
126
127 struct tree_backref {
128         struct extent_backref node;
129         union {
130                 u64 parent;
131                 u64 root;
132         };
133 };
134
135 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
136 {
137         return container_of(back, struct tree_backref, node);
138 }
139
140 /* Explicit initialization for extent_record::flag_block_full_backref */
141 enum { FLAG_UNSET = 2 };
142
143 struct extent_record {
144         struct list_head backrefs;
145         struct list_head dups;
146         struct list_head list;
147         struct cache_extent cache;
148         struct btrfs_disk_key parent_key;
149         u64 start;
150         u64 max_size;
151         u64 nr;
152         u64 refs;
153         u64 extent_item_refs;
154         u64 generation;
155         u64 parent_generation;
156         u64 info_objectid;
157         u32 num_duplicates;
158         u8 info_level;
159         unsigned int flag_block_full_backref:2;
160         unsigned int found_rec:1;
161         unsigned int content_checked:1;
162         unsigned int owner_ref_checked:1;
163         unsigned int is_root:1;
164         unsigned int metadata:1;
165         unsigned int bad_full_backref:1;
166         unsigned int crossing_stripes:1;
167         unsigned int wrong_chunk_type:1;
168 };
169
170 static inline struct extent_record* to_extent_record(struct list_head *entry)
171 {
172         return container_of(entry, struct extent_record, list);
173 }
174
175 struct inode_backref {
176         struct list_head list;
177         unsigned int found_dir_item:1;
178         unsigned int found_dir_index:1;
179         unsigned int found_inode_ref:1;
180         unsigned int filetype:8;
181         int errors;
182         unsigned int ref_type;
183         u64 dir;
184         u64 index;
185         u16 namelen;
186         char name[0];
187 };
188
189 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
190 {
191         return list_entry(entry, struct inode_backref, list);
192 }
193
194 struct root_item_record {
195         struct list_head list;
196         u64 objectid;
197         u64 bytenr;
198         u64 last_snapshot;
199         u8 level;
200         u8 drop_level;
201         int level_size;
202         struct btrfs_key drop_key;
203 };
204
205 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
206 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
207 #define REF_ERR_NO_INODE_REF            (1 << 2)
208 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
209 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
210 #define REF_ERR_DUP_INODE_REF           (1 << 5)
211 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
212 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
213 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
214 #define REF_ERR_NO_ROOT_REF             (1 << 9)
215 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
216 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
217 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
218
219 struct file_extent_hole {
220         struct rb_node node;
221         u64 start;
222         u64 len;
223 };
224
225 struct inode_record {
226         struct list_head backrefs;
227         unsigned int checked:1;
228         unsigned int merging:1;
229         unsigned int found_inode_item:1;
230         unsigned int found_dir_item:1;
231         unsigned int found_file_extent:1;
232         unsigned int found_csum_item:1;
233         unsigned int some_csum_missing:1;
234         unsigned int nodatasum:1;
235         int errors;
236
237         u64 ino;
238         u32 nlink;
239         u32 imode;
240         u64 isize;
241         u64 nbytes;
242
243         u32 found_link;
244         u64 found_size;
245         u64 extent_start;
246         u64 extent_end;
247         struct rb_root holes;
248         struct list_head orphan_extents;
249
250         u32 refs;
251 };
252
253 #define I_ERR_NO_INODE_ITEM             (1 << 0)
254 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
255 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
256 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
257 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
258 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
259 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
260 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
261 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
262 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
263 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
264 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
265 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
266 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
267 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
268
269 struct root_backref {
270         struct list_head list;
271         unsigned int found_dir_item:1;
272         unsigned int found_dir_index:1;
273         unsigned int found_back_ref:1;
274         unsigned int found_forward_ref:1;
275         unsigned int reachable:1;
276         int errors;
277         u64 ref_root;
278         u64 dir;
279         u64 index;
280         u16 namelen;
281         char name[0];
282 };
283
284 static inline struct root_backref* to_root_backref(struct list_head *entry)
285 {
286         return list_entry(entry, struct root_backref, list);
287 }
288
289 struct root_record {
290         struct list_head backrefs;
291         struct cache_extent cache;
292         unsigned int found_root_item:1;
293         u64 objectid;
294         u32 found_ref;
295 };
296
297 struct ptr_node {
298         struct cache_extent cache;
299         void *data;
300 };
301
302 struct shared_node {
303         struct cache_extent cache;
304         struct cache_tree root_cache;
305         struct cache_tree inode_cache;
306         struct inode_record *current;
307         u32 refs;
308 };
309
310 struct block_info {
311         u64 start;
312         u32 size;
313 };
314
315 struct walk_control {
316         struct cache_tree shared;
317         struct shared_node *nodes[BTRFS_MAX_LEVEL];
318         int active_node;
319         int root_level;
320 };
321
322 struct bad_item {
323         struct btrfs_key key;
324         u64 root_id;
325         struct list_head list;
326 };
327
328 struct extent_entry {
329         u64 bytenr;
330         u64 bytes;
331         int count;
332         int broken;
333         struct list_head list;
334 };
335
336 struct root_item_info {
337         /* level of the root */
338         u8 level;
339         /* number of nodes at this level, must be 1 for a root */
340         int node_count;
341         u64 bytenr;
342         u64 gen;
343         struct cache_extent cache_extent;
344 };
345
346 static void *print_status_check(void *p)
347 {
348         struct task_ctx *priv = p;
349         const char work_indicator[] = { '.', 'o', 'O', 'o' };
350         uint32_t count = 0;
351         static char *task_position_string[] = {
352                 "checking extents",
353                 "checking free space cache",
354                 "checking fs roots",
355         };
356
357         task_period_start(priv->info, 1000 /* 1s */);
358
359         if (priv->tp == TASK_NOTHING)
360                 return NULL;
361
362         while (1) {
363                 printf("%s [%c]\r", task_position_string[priv->tp],
364                                 work_indicator[count % 4]);
365                 count++;
366                 fflush(stdout);
367                 task_period_wait(priv->info);
368         }
369         return NULL;
370 }
371
372 static int print_status_return(void *p)
373 {
374         printf("\n");
375         fflush(stdout);
376
377         return 0;
378 }
379
380 /* Compatible function to allow reuse of old codes */
381 static u64 first_extent_gap(struct rb_root *holes)
382 {
383         struct file_extent_hole *hole;
384
385         if (RB_EMPTY_ROOT(holes))
386                 return (u64)-1;
387
388         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
389         return hole->start;
390 }
391
392 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
393 {
394         struct file_extent_hole *hole1;
395         struct file_extent_hole *hole2;
396
397         hole1 = rb_entry(node1, struct file_extent_hole, node);
398         hole2 = rb_entry(node2, struct file_extent_hole, node);
399
400         if (hole1->start > hole2->start)
401                 return -1;
402         if (hole1->start < hole2->start)
403                 return 1;
404         /* Now hole1->start == hole2->start */
405         if (hole1->len >= hole2->len)
406                 /*
407                  * Hole 1 will be merge center
408                  * Same hole will be merged later
409                  */
410                 return -1;
411         /* Hole 2 will be merge center */
412         return 1;
413 }
414
415 /*
416  * Add a hole to the record
417  *
418  * This will do hole merge for copy_file_extent_holes(),
419  * which will ensure there won't be continuous holes.
420  */
421 static int add_file_extent_hole(struct rb_root *holes,
422                                 u64 start, u64 len)
423 {
424         struct file_extent_hole *hole;
425         struct file_extent_hole *prev = NULL;
426         struct file_extent_hole *next = NULL;
427
428         hole = malloc(sizeof(*hole));
429         if (!hole)
430                 return -ENOMEM;
431         hole->start = start;
432         hole->len = len;
433         /* Since compare will not return 0, no -EEXIST will happen */
434         rb_insert(holes, &hole->node, compare_hole);
435
436         /* simple merge with previous hole */
437         if (rb_prev(&hole->node))
438                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
439                                 node);
440         if (prev && prev->start + prev->len >= hole->start) {
441                 hole->len = hole->start + hole->len - prev->start;
442                 hole->start = prev->start;
443                 rb_erase(&prev->node, holes);
444                 free(prev);
445                 prev = NULL;
446         }
447
448         /* iterate merge with next holes */
449         while (1) {
450                 if (!rb_next(&hole->node))
451                         break;
452                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
453                                         node);
454                 if (hole->start + hole->len >= next->start) {
455                         if (hole->start + hole->len <= next->start + next->len)
456                                 hole->len = next->start + next->len -
457                                             hole->start;
458                         rb_erase(&next->node, holes);
459                         free(next);
460                         next = NULL;
461                 } else
462                         break;
463         }
464         return 0;
465 }
466
467 static int compare_hole_range(struct rb_node *node, void *data)
468 {
469         struct file_extent_hole *hole;
470         u64 start;
471
472         hole = (struct file_extent_hole *)data;
473         start = hole->start;
474
475         hole = rb_entry(node, struct file_extent_hole, node);
476         if (start < hole->start)
477                 return -1;
478         if (start >= hole->start && start < hole->start + hole->len)
479                 return 0;
480         return 1;
481 }
482
483 /*
484  * Delete a hole in the record
485  *
486  * This will do the hole split and is much restrict than add.
487  */
488 static int del_file_extent_hole(struct rb_root *holes,
489                                 u64 start, u64 len)
490 {
491         struct file_extent_hole *hole;
492         struct file_extent_hole tmp;
493         u64 prev_start = 0;
494         u64 prev_len = 0;
495         u64 next_start = 0;
496         u64 next_len = 0;
497         struct rb_node *node;
498         int have_prev = 0;
499         int have_next = 0;
500         int ret = 0;
501
502         tmp.start = start;
503         tmp.len = len;
504         node = rb_search(holes, &tmp, compare_hole_range, NULL);
505         if (!node)
506                 return -EEXIST;
507         hole = rb_entry(node, struct file_extent_hole, node);
508         if (start + len > hole->start + hole->len)
509                 return -EEXIST;
510
511         /*
512          * Now there will be no overlap, delete the hole and re-add the
513          * split(s) if they exists.
514          */
515         if (start > hole->start) {
516                 prev_start = hole->start;
517                 prev_len = start - hole->start;
518                 have_prev = 1;
519         }
520         if (hole->start + hole->len > start + len) {
521                 next_start = start + len;
522                 next_len = hole->start + hole->len - start - len;
523                 have_next = 1;
524         }
525         rb_erase(node, holes);
526         free(hole);
527         if (have_prev) {
528                 ret = add_file_extent_hole(holes, prev_start, prev_len);
529                 if (ret < 0)
530                         return ret;
531         }
532         if (have_next) {
533                 ret = add_file_extent_hole(holes, next_start, next_len);
534                 if (ret < 0)
535                         return ret;
536         }
537         return 0;
538 }
539
540 static int copy_file_extent_holes(struct rb_root *dst,
541                                   struct rb_root *src)
542 {
543         struct file_extent_hole *hole;
544         struct rb_node *node;
545         int ret = 0;
546
547         node = rb_first(src);
548         while (node) {
549                 hole = rb_entry(node, struct file_extent_hole, node);
550                 ret = add_file_extent_hole(dst, hole->start, hole->len);
551                 if (ret)
552                         break;
553                 node = rb_next(node);
554         }
555         return ret;
556 }
557
558 static void free_file_extent_holes(struct rb_root *holes)
559 {
560         struct rb_node *node;
561         struct file_extent_hole *hole;
562
563         node = rb_first(holes);
564         while (node) {
565                 hole = rb_entry(node, struct file_extent_hole, node);
566                 rb_erase(node, holes);
567                 free(hole);
568                 node = rb_first(holes);
569         }
570 }
571
572 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
573
574 static void record_root_in_trans(struct btrfs_trans_handle *trans,
575                                  struct btrfs_root *root)
576 {
577         if (root->last_trans != trans->transid) {
578                 root->track_dirty = 1;
579                 root->last_trans = trans->transid;
580                 root->commit_root = root->node;
581                 extent_buffer_get(root->node);
582         }
583 }
584
585 static u8 imode_to_type(u32 imode)
586 {
587 #define S_SHIFT 12
588         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
589                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
590                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
591                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
592                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
593                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
594                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
595                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
596         };
597
598         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
599 #undef S_SHIFT
600 }
601
602 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
603 {
604         struct device_record *rec1;
605         struct device_record *rec2;
606
607         rec1 = rb_entry(node1, struct device_record, node);
608         rec2 = rb_entry(node2, struct device_record, node);
609         if (rec1->devid > rec2->devid)
610                 return -1;
611         else if (rec1->devid < rec2->devid)
612                 return 1;
613         else
614                 return 0;
615 }
616
617 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
618 {
619         struct inode_record *rec;
620         struct inode_backref *backref;
621         struct inode_backref *orig;
622         struct inode_backref *tmp;
623         struct orphan_data_extent *src_orphan;
624         struct orphan_data_extent *dst_orphan;
625         size_t size;
626         int ret;
627
628         rec = malloc(sizeof(*rec));
629         if (!rec)
630                 return ERR_PTR(-ENOMEM);
631         memcpy(rec, orig_rec, sizeof(*rec));
632         rec->refs = 1;
633         INIT_LIST_HEAD(&rec->backrefs);
634         INIT_LIST_HEAD(&rec->orphan_extents);
635         rec->holes = RB_ROOT;
636
637         list_for_each_entry(orig, &orig_rec->backrefs, list) {
638                 size = sizeof(*orig) + orig->namelen + 1;
639                 backref = malloc(size);
640                 if (!backref) {
641                         ret = -ENOMEM;
642                         goto cleanup;
643                 }
644                 memcpy(backref, orig, size);
645                 list_add_tail(&backref->list, &rec->backrefs);
646         }
647         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
648                 dst_orphan = malloc(sizeof(*dst_orphan));
649                 if (!dst_orphan) {
650                         ret = -ENOMEM;
651                         goto cleanup;
652                 }
653                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
654                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
655         }
656         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
657         BUG_ON(ret < 0);
658
659         return rec;
660
661 cleanup:
662         if (!list_empty(&rec->backrefs))
663                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
664                         list_del(&orig->list);
665                         free(orig);
666                 }
667
668         if (!list_empty(&rec->orphan_extents))
669                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
670                         list_del(&orig->list);
671                         free(orig);
672                 }
673
674         free(rec);
675
676         return ERR_PTR(ret);
677 }
678
679 static void print_orphan_data_extents(struct list_head *orphan_extents,
680                                       u64 objectid)
681 {
682         struct orphan_data_extent *orphan;
683
684         if (list_empty(orphan_extents))
685                 return;
686         printf("The following data extent is lost in tree %llu:\n",
687                objectid);
688         list_for_each_entry(orphan, orphan_extents, list) {
689                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
690                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
691                        orphan->disk_len);
692         }
693 }
694
695 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
696 {
697         u64 root_objectid = root->root_key.objectid;
698         int errors = rec->errors;
699
700         if (!errors)
701                 return;
702         /* reloc root errors, we print its corresponding fs root objectid*/
703         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
704                 root_objectid = root->root_key.offset;
705                 fprintf(stderr, "reloc");
706         }
707         fprintf(stderr, "root %llu inode %llu errors %x",
708                 (unsigned long long) root_objectid,
709                 (unsigned long long) rec->ino, rec->errors);
710
711         if (errors & I_ERR_NO_INODE_ITEM)
712                 fprintf(stderr, ", no inode item");
713         if (errors & I_ERR_NO_ORPHAN_ITEM)
714                 fprintf(stderr, ", no orphan item");
715         if (errors & I_ERR_DUP_INODE_ITEM)
716                 fprintf(stderr, ", dup inode item");
717         if (errors & I_ERR_DUP_DIR_INDEX)
718                 fprintf(stderr, ", dup dir index");
719         if (errors & I_ERR_ODD_DIR_ITEM)
720                 fprintf(stderr, ", odd dir item");
721         if (errors & I_ERR_ODD_FILE_EXTENT)
722                 fprintf(stderr, ", odd file extent");
723         if (errors & I_ERR_BAD_FILE_EXTENT)
724                 fprintf(stderr, ", bad file extent");
725         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
726                 fprintf(stderr, ", file extent overlap");
727         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
728                 fprintf(stderr, ", file extent discount");
729         if (errors & I_ERR_DIR_ISIZE_WRONG)
730                 fprintf(stderr, ", dir isize wrong");
731         if (errors & I_ERR_FILE_NBYTES_WRONG)
732                 fprintf(stderr, ", nbytes wrong");
733         if (errors & I_ERR_ODD_CSUM_ITEM)
734                 fprintf(stderr, ", odd csum item");
735         if (errors & I_ERR_SOME_CSUM_MISSING)
736                 fprintf(stderr, ", some csum missing");
737         if (errors & I_ERR_LINK_COUNT_WRONG)
738                 fprintf(stderr, ", link count wrong");
739         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
740                 fprintf(stderr, ", orphan file extent");
741         fprintf(stderr, "\n");
742         /* Print the orphan extents if needed */
743         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
744                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
745
746         /* Print the holes if needed */
747         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
748                 struct file_extent_hole *hole;
749                 struct rb_node *node;
750                 int found = 0;
751
752                 node = rb_first(&rec->holes);
753                 fprintf(stderr, "Found file extent holes:\n");
754                 while (node) {
755                         found = 1;
756                         hole = rb_entry(node, struct file_extent_hole, node);
757                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
758                                 hole->start, hole->len);
759                         node = rb_next(node);
760                 }
761                 if (!found)
762                         fprintf(stderr, "\tstart: 0, len: %llu\n",
763                                 round_up(rec->isize, root->sectorsize));
764         }
765 }
766
767 static void print_ref_error(int errors)
768 {
769         if (errors & REF_ERR_NO_DIR_ITEM)
770                 fprintf(stderr, ", no dir item");
771         if (errors & REF_ERR_NO_DIR_INDEX)
772                 fprintf(stderr, ", no dir index");
773         if (errors & REF_ERR_NO_INODE_REF)
774                 fprintf(stderr, ", no inode ref");
775         if (errors & REF_ERR_DUP_DIR_ITEM)
776                 fprintf(stderr, ", dup dir item");
777         if (errors & REF_ERR_DUP_DIR_INDEX)
778                 fprintf(stderr, ", dup dir index");
779         if (errors & REF_ERR_DUP_INODE_REF)
780                 fprintf(stderr, ", dup inode ref");
781         if (errors & REF_ERR_INDEX_UNMATCH)
782                 fprintf(stderr, ", index mismatch");
783         if (errors & REF_ERR_FILETYPE_UNMATCH)
784                 fprintf(stderr, ", filetype mismatch");
785         if (errors & REF_ERR_NAME_TOO_LONG)
786                 fprintf(stderr, ", name too long");
787         if (errors & REF_ERR_NO_ROOT_REF)
788                 fprintf(stderr, ", no root ref");
789         if (errors & REF_ERR_NO_ROOT_BACKREF)
790                 fprintf(stderr, ", no root backref");
791         if (errors & REF_ERR_DUP_ROOT_REF)
792                 fprintf(stderr, ", dup root ref");
793         if (errors & REF_ERR_DUP_ROOT_BACKREF)
794                 fprintf(stderr, ", dup root backref");
795         fprintf(stderr, "\n");
796 }
797
798 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
799                                           u64 ino, int mod)
800 {
801         struct ptr_node *node;
802         struct cache_extent *cache;
803         struct inode_record *rec = NULL;
804         int ret;
805
806         cache = lookup_cache_extent(inode_cache, ino, 1);
807         if (cache) {
808                 node = container_of(cache, struct ptr_node, cache);
809                 rec = node->data;
810                 if (mod && rec->refs > 1) {
811                         node->data = clone_inode_rec(rec);
812                         if (IS_ERR(node->data))
813                                 return node->data;
814                         rec->refs--;
815                         rec = node->data;
816                 }
817         } else if (mod) {
818                 rec = calloc(1, sizeof(*rec));
819                 if (!rec)
820                         return ERR_PTR(-ENOMEM);
821                 rec->ino = ino;
822                 rec->extent_start = (u64)-1;
823                 rec->refs = 1;
824                 INIT_LIST_HEAD(&rec->backrefs);
825                 INIT_LIST_HEAD(&rec->orphan_extents);
826                 rec->holes = RB_ROOT;
827
828                 node = malloc(sizeof(*node));
829                 if (!node) {
830                         free(rec);
831                         return ERR_PTR(-ENOMEM);
832                 }
833                 node->cache.start = ino;
834                 node->cache.size = 1;
835                 node->data = rec;
836
837                 if (ino == BTRFS_FREE_INO_OBJECTID)
838                         rec->found_link = 1;
839
840                 ret = insert_cache_extent(inode_cache, &node->cache);
841                 if (ret)
842                         return ERR_PTR(-EEXIST);
843         }
844         return rec;
845 }
846
847 static void free_orphan_data_extents(struct list_head *orphan_extents)
848 {
849         struct orphan_data_extent *orphan;
850
851         while (!list_empty(orphan_extents)) {
852                 orphan = list_entry(orphan_extents->next,
853                                     struct orphan_data_extent, list);
854                 list_del(&orphan->list);
855                 free(orphan);
856         }
857 }
858
859 static void free_inode_rec(struct inode_record *rec)
860 {
861         struct inode_backref *backref;
862
863         if (--rec->refs > 0)
864                 return;
865
866         while (!list_empty(&rec->backrefs)) {
867                 backref = to_inode_backref(rec->backrefs.next);
868                 list_del(&backref->list);
869                 free(backref);
870         }
871         free_orphan_data_extents(&rec->orphan_extents);
872         free_file_extent_holes(&rec->holes);
873         free(rec);
874 }
875
876 static int can_free_inode_rec(struct inode_record *rec)
877 {
878         if (!rec->errors && rec->checked && rec->found_inode_item &&
879             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
880                 return 1;
881         return 0;
882 }
883
884 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
885                                  struct inode_record *rec)
886 {
887         struct cache_extent *cache;
888         struct inode_backref *tmp, *backref;
889         struct ptr_node *node;
890         unsigned char filetype;
891
892         if (!rec->found_inode_item)
893                 return;
894
895         filetype = imode_to_type(rec->imode);
896         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
897                 if (backref->found_dir_item && backref->found_dir_index) {
898                         if (backref->filetype != filetype)
899                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
900                         if (!backref->errors && backref->found_inode_ref &&
901                             rec->nlink == rec->found_link) {
902                                 list_del(&backref->list);
903                                 free(backref);
904                         }
905                 }
906         }
907
908         if (!rec->checked || rec->merging)
909                 return;
910
911         if (S_ISDIR(rec->imode)) {
912                 if (rec->found_size != rec->isize)
913                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
914                 if (rec->found_file_extent)
915                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
916         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
917                 if (rec->found_dir_item)
918                         rec->errors |= I_ERR_ODD_DIR_ITEM;
919                 if (rec->found_size != rec->nbytes)
920                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
921                 if (rec->nlink > 0 && !no_holes &&
922                     (rec->extent_end < rec->isize ||
923                      first_extent_gap(&rec->holes) < rec->isize))
924                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
925         }
926
927         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
928                 if (rec->found_csum_item && rec->nodatasum)
929                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
930                 if (rec->some_csum_missing && !rec->nodatasum)
931                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
932         }
933
934         BUG_ON(rec->refs != 1);
935         if (can_free_inode_rec(rec)) {
936                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
937                 node = container_of(cache, struct ptr_node, cache);
938                 BUG_ON(node->data != rec);
939                 remove_cache_extent(inode_cache, &node->cache);
940                 free(node);
941                 free_inode_rec(rec);
942         }
943 }
944
945 static int check_orphan_item(struct btrfs_root *root, u64 ino)
946 {
947         struct btrfs_path path;
948         struct btrfs_key key;
949         int ret;
950
951         key.objectid = BTRFS_ORPHAN_OBJECTID;
952         key.type = BTRFS_ORPHAN_ITEM_KEY;
953         key.offset = ino;
954
955         btrfs_init_path(&path);
956         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
957         btrfs_release_path(&path);
958         if (ret > 0)
959                 ret = -ENOENT;
960         return ret;
961 }
962
963 static int process_inode_item(struct extent_buffer *eb,
964                               int slot, struct btrfs_key *key,
965                               struct shared_node *active_node)
966 {
967         struct inode_record *rec;
968         struct btrfs_inode_item *item;
969
970         rec = active_node->current;
971         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
972         if (rec->found_inode_item) {
973                 rec->errors |= I_ERR_DUP_INODE_ITEM;
974                 return 1;
975         }
976         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
977         rec->nlink = btrfs_inode_nlink(eb, item);
978         rec->isize = btrfs_inode_size(eb, item);
979         rec->nbytes = btrfs_inode_nbytes(eb, item);
980         rec->imode = btrfs_inode_mode(eb, item);
981         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
982                 rec->nodatasum = 1;
983         rec->found_inode_item = 1;
984         if (rec->nlink == 0)
985                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
986         maybe_free_inode_rec(&active_node->inode_cache, rec);
987         return 0;
988 }
989
990 static struct inode_backref *get_inode_backref(struct inode_record *rec,
991                                                 const char *name,
992                                                 int namelen, u64 dir)
993 {
994         struct inode_backref *backref;
995
996         list_for_each_entry(backref, &rec->backrefs, list) {
997                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
998                         break;
999                 if (backref->dir != dir || backref->namelen != namelen)
1000                         continue;
1001                 if (memcmp(name, backref->name, namelen))
1002                         continue;
1003                 return backref;
1004         }
1005
1006         backref = malloc(sizeof(*backref) + namelen + 1);
1007         if (!backref)
1008                 return NULL;
1009         memset(backref, 0, sizeof(*backref));
1010         backref->dir = dir;
1011         backref->namelen = namelen;
1012         memcpy(backref->name, name, namelen);
1013         backref->name[namelen] = '\0';
1014         list_add_tail(&backref->list, &rec->backrefs);
1015         return backref;
1016 }
1017
1018 static int add_inode_backref(struct cache_tree *inode_cache,
1019                              u64 ino, u64 dir, u64 index,
1020                              const char *name, int namelen,
1021                              int filetype, int itemtype, int errors)
1022 {
1023         struct inode_record *rec;
1024         struct inode_backref *backref;
1025
1026         rec = get_inode_rec(inode_cache, ino, 1);
1027         BUG_ON(IS_ERR(rec));
1028         backref = get_inode_backref(rec, name, namelen, dir);
1029         BUG_ON(!backref);
1030         if (errors)
1031                 backref->errors |= errors;
1032         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1033                 if (backref->found_dir_index)
1034                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1035                 if (backref->found_inode_ref && backref->index != index)
1036                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1037                 if (backref->found_dir_item && backref->filetype != filetype)
1038                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1039
1040                 backref->index = index;
1041                 backref->filetype = filetype;
1042                 backref->found_dir_index = 1;
1043         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1044                 rec->found_link++;
1045                 if (backref->found_dir_item)
1046                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1047                 if (backref->found_dir_index && backref->filetype != filetype)
1048                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1049
1050                 backref->filetype = filetype;
1051                 backref->found_dir_item = 1;
1052         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1053                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1054                 if (backref->found_inode_ref)
1055                         backref->errors |= REF_ERR_DUP_INODE_REF;
1056                 if (backref->found_dir_index && backref->index != index)
1057                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1058                 else
1059                         backref->index = index;
1060
1061                 backref->ref_type = itemtype;
1062                 backref->found_inode_ref = 1;
1063         } else {
1064                 BUG_ON(1);
1065         }
1066
1067         maybe_free_inode_rec(inode_cache, rec);
1068         return 0;
1069 }
1070
1071 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1072                             struct cache_tree *dst_cache)
1073 {
1074         struct inode_backref *backref;
1075         u32 dir_count = 0;
1076         int ret = 0;
1077
1078         dst->merging = 1;
1079         list_for_each_entry(backref, &src->backrefs, list) {
1080                 if (backref->found_dir_index) {
1081                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1082                                         backref->index, backref->name,
1083                                         backref->namelen, backref->filetype,
1084                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1085                 }
1086                 if (backref->found_dir_item) {
1087                         dir_count++;
1088                         add_inode_backref(dst_cache, dst->ino,
1089                                         backref->dir, 0, backref->name,
1090                                         backref->namelen, backref->filetype,
1091                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1092                 }
1093                 if (backref->found_inode_ref) {
1094                         add_inode_backref(dst_cache, dst->ino,
1095                                         backref->dir, backref->index,
1096                                         backref->name, backref->namelen, 0,
1097                                         backref->ref_type, backref->errors);
1098                 }
1099         }
1100
1101         if (src->found_dir_item)
1102                 dst->found_dir_item = 1;
1103         if (src->found_file_extent)
1104                 dst->found_file_extent = 1;
1105         if (src->found_csum_item)
1106                 dst->found_csum_item = 1;
1107         if (src->some_csum_missing)
1108                 dst->some_csum_missing = 1;
1109         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1110                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1111                 if (ret < 0)
1112                         return ret;
1113         }
1114
1115         BUG_ON(src->found_link < dir_count);
1116         dst->found_link += src->found_link - dir_count;
1117         dst->found_size += src->found_size;
1118         if (src->extent_start != (u64)-1) {
1119                 if (dst->extent_start == (u64)-1) {
1120                         dst->extent_start = src->extent_start;
1121                         dst->extent_end = src->extent_end;
1122                 } else {
1123                         if (dst->extent_end > src->extent_start)
1124                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1125                         else if (dst->extent_end < src->extent_start) {
1126                                 ret = add_file_extent_hole(&dst->holes,
1127                                         dst->extent_end,
1128                                         src->extent_start - dst->extent_end);
1129                         }
1130                         if (dst->extent_end < src->extent_end)
1131                                 dst->extent_end = src->extent_end;
1132                 }
1133         }
1134
1135         dst->errors |= src->errors;
1136         if (src->found_inode_item) {
1137                 if (!dst->found_inode_item) {
1138                         dst->nlink = src->nlink;
1139                         dst->isize = src->isize;
1140                         dst->nbytes = src->nbytes;
1141                         dst->imode = src->imode;
1142                         dst->nodatasum = src->nodatasum;
1143                         dst->found_inode_item = 1;
1144                 } else {
1145                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1146                 }
1147         }
1148         dst->merging = 0;
1149
1150         return 0;
1151 }
1152
1153 static int splice_shared_node(struct shared_node *src_node,
1154                               struct shared_node *dst_node)
1155 {
1156         struct cache_extent *cache;
1157         struct ptr_node *node, *ins;
1158         struct cache_tree *src, *dst;
1159         struct inode_record *rec, *conflict;
1160         u64 current_ino = 0;
1161         int splice = 0;
1162         int ret;
1163
1164         if (--src_node->refs == 0)
1165                 splice = 1;
1166         if (src_node->current)
1167                 current_ino = src_node->current->ino;
1168
1169         src = &src_node->root_cache;
1170         dst = &dst_node->root_cache;
1171 again:
1172         cache = search_cache_extent(src, 0);
1173         while (cache) {
1174                 node = container_of(cache, struct ptr_node, cache);
1175                 rec = node->data;
1176                 cache = next_cache_extent(cache);
1177
1178                 if (splice) {
1179                         remove_cache_extent(src, &node->cache);
1180                         ins = node;
1181                 } else {
1182                         ins = malloc(sizeof(*ins));
1183                         BUG_ON(!ins);
1184                         ins->cache.start = node->cache.start;
1185                         ins->cache.size = node->cache.size;
1186                         ins->data = rec;
1187                         rec->refs++;
1188                 }
1189                 ret = insert_cache_extent(dst, &ins->cache);
1190                 if (ret == -EEXIST) {
1191                         conflict = get_inode_rec(dst, rec->ino, 1);
1192                         BUG_ON(IS_ERR(conflict));
1193                         merge_inode_recs(rec, conflict, dst);
1194                         if (rec->checked) {
1195                                 conflict->checked = 1;
1196                                 if (dst_node->current == conflict)
1197                                         dst_node->current = NULL;
1198                         }
1199                         maybe_free_inode_rec(dst, conflict);
1200                         free_inode_rec(rec);
1201                         free(ins);
1202                 } else {
1203                         BUG_ON(ret);
1204                 }
1205         }
1206
1207         if (src == &src_node->root_cache) {
1208                 src = &src_node->inode_cache;
1209                 dst = &dst_node->inode_cache;
1210                 goto again;
1211         }
1212
1213         if (current_ino > 0 && (!dst_node->current ||
1214             current_ino > dst_node->current->ino)) {
1215                 if (dst_node->current) {
1216                         dst_node->current->checked = 1;
1217                         maybe_free_inode_rec(dst, dst_node->current);
1218                 }
1219                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1220                 BUG_ON(IS_ERR(dst_node->current));
1221         }
1222         return 0;
1223 }
1224
1225 static void free_inode_ptr(struct cache_extent *cache)
1226 {
1227         struct ptr_node *node;
1228         struct inode_record *rec;
1229
1230         node = container_of(cache, struct ptr_node, cache);
1231         rec = node->data;
1232         free_inode_rec(rec);
1233         free(node);
1234 }
1235
1236 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1237
1238 static struct shared_node *find_shared_node(struct cache_tree *shared,
1239                                             u64 bytenr)
1240 {
1241         struct cache_extent *cache;
1242         struct shared_node *node;
1243
1244         cache = lookup_cache_extent(shared, bytenr, 1);
1245         if (cache) {
1246                 node = container_of(cache, struct shared_node, cache);
1247                 return node;
1248         }
1249         return NULL;
1250 }
1251
1252 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1253 {
1254         int ret;
1255         struct shared_node *node;
1256
1257         node = calloc(1, sizeof(*node));
1258         if (!node)
1259                 return -ENOMEM;
1260         node->cache.start = bytenr;
1261         node->cache.size = 1;
1262         cache_tree_init(&node->root_cache);
1263         cache_tree_init(&node->inode_cache);
1264         node->refs = refs;
1265
1266         ret = insert_cache_extent(shared, &node->cache);
1267
1268         return ret;
1269 }
1270
1271 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1272                              struct walk_control *wc, int level)
1273 {
1274         struct shared_node *node;
1275         struct shared_node *dest;
1276         int ret;
1277
1278         if (level == wc->active_node)
1279                 return 0;
1280
1281         BUG_ON(wc->active_node <= level);
1282         node = find_shared_node(&wc->shared, bytenr);
1283         if (!node) {
1284                 ret = add_shared_node(&wc->shared, bytenr, refs);
1285                 BUG_ON(ret);
1286                 node = find_shared_node(&wc->shared, bytenr);
1287                 wc->nodes[level] = node;
1288                 wc->active_node = level;
1289                 return 0;
1290         }
1291
1292         if (wc->root_level == wc->active_node &&
1293             btrfs_root_refs(&root->root_item) == 0) {
1294                 if (--node->refs == 0) {
1295                         free_inode_recs_tree(&node->root_cache);
1296                         free_inode_recs_tree(&node->inode_cache);
1297                         remove_cache_extent(&wc->shared, &node->cache);
1298                         free(node);
1299                 }
1300                 return 1;
1301         }
1302
1303         dest = wc->nodes[wc->active_node];
1304         splice_shared_node(node, dest);
1305         if (node->refs == 0) {
1306                 remove_cache_extent(&wc->shared, &node->cache);
1307                 free(node);
1308         }
1309         return 1;
1310 }
1311
1312 static int leave_shared_node(struct btrfs_root *root,
1313                              struct walk_control *wc, int level)
1314 {
1315         struct shared_node *node;
1316         struct shared_node *dest;
1317         int i;
1318
1319         if (level == wc->root_level)
1320                 return 0;
1321
1322         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1323                 if (wc->nodes[i])
1324                         break;
1325         }
1326         BUG_ON(i >= BTRFS_MAX_LEVEL);
1327
1328         node = wc->nodes[wc->active_node];
1329         wc->nodes[wc->active_node] = NULL;
1330         wc->active_node = i;
1331
1332         dest = wc->nodes[wc->active_node];
1333         if (wc->active_node < wc->root_level ||
1334             btrfs_root_refs(&root->root_item) > 0) {
1335                 BUG_ON(node->refs <= 1);
1336                 splice_shared_node(node, dest);
1337         } else {
1338                 BUG_ON(node->refs < 2);
1339                 node->refs--;
1340         }
1341         return 0;
1342 }
1343
1344 /*
1345  * Returns:
1346  * < 0 - on error
1347  * 1   - if the root with id child_root_id is a child of root parent_root_id
1348  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1349  *       has other root(s) as parent(s)
1350  * 2   - if the root child_root_id doesn't have any parent roots
1351  */
1352 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1353                          u64 child_root_id)
1354 {
1355         struct btrfs_path path;
1356         struct btrfs_key key;
1357         struct extent_buffer *leaf;
1358         int has_parent = 0;
1359         int ret;
1360
1361         btrfs_init_path(&path);
1362
1363         key.objectid = parent_root_id;
1364         key.type = BTRFS_ROOT_REF_KEY;
1365         key.offset = child_root_id;
1366         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1367                                 0, 0);
1368         if (ret < 0)
1369                 return ret;
1370         btrfs_release_path(&path);
1371         if (!ret)
1372                 return 1;
1373
1374         key.objectid = child_root_id;
1375         key.type = BTRFS_ROOT_BACKREF_KEY;
1376         key.offset = 0;
1377         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1378                                 0, 0);
1379         if (ret < 0)
1380                 goto out;
1381
1382         while (1) {
1383                 leaf = path.nodes[0];
1384                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1385                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1386                         if (ret)
1387                                 break;
1388                         leaf = path.nodes[0];
1389                 }
1390
1391                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1392                 if (key.objectid != child_root_id ||
1393                     key.type != BTRFS_ROOT_BACKREF_KEY)
1394                         break;
1395
1396                 has_parent = 1;
1397
1398                 if (key.offset == parent_root_id) {
1399                         btrfs_release_path(&path);
1400                         return 1;
1401                 }
1402
1403                 path.slots[0]++;
1404         }
1405 out:
1406         btrfs_release_path(&path);
1407         if (ret < 0)
1408                 return ret;
1409         return has_parent ? 0 : 2;
1410 }
1411
1412 static int process_dir_item(struct btrfs_root *root,
1413                             struct extent_buffer *eb,
1414                             int slot, struct btrfs_key *key,
1415                             struct shared_node *active_node)
1416 {
1417         u32 total;
1418         u32 cur = 0;
1419         u32 len;
1420         u32 name_len;
1421         u32 data_len;
1422         int error;
1423         int nritems = 0;
1424         int filetype;
1425         struct btrfs_dir_item *di;
1426         struct inode_record *rec;
1427         struct cache_tree *root_cache;
1428         struct cache_tree *inode_cache;
1429         struct btrfs_key location;
1430         char namebuf[BTRFS_NAME_LEN];
1431
1432         root_cache = &active_node->root_cache;
1433         inode_cache = &active_node->inode_cache;
1434         rec = active_node->current;
1435         rec->found_dir_item = 1;
1436
1437         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1438         total = btrfs_item_size_nr(eb, slot);
1439         while (cur < total) {
1440                 nritems++;
1441                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1442                 name_len = btrfs_dir_name_len(eb, di);
1443                 data_len = btrfs_dir_data_len(eb, di);
1444                 filetype = btrfs_dir_type(eb, di);
1445
1446                 rec->found_size += name_len;
1447                 if (name_len <= BTRFS_NAME_LEN) {
1448                         len = name_len;
1449                         error = 0;
1450                 } else {
1451                         len = BTRFS_NAME_LEN;
1452                         error = REF_ERR_NAME_TOO_LONG;
1453                 }
1454                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1455
1456                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1457                         add_inode_backref(inode_cache, location.objectid,
1458                                           key->objectid, key->offset, namebuf,
1459                                           len, filetype, key->type, error);
1460                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1461                         add_inode_backref(root_cache, location.objectid,
1462                                           key->objectid, key->offset,
1463                                           namebuf, len, filetype,
1464                                           key->type, error);
1465                 } else {
1466                         fprintf(stderr, "invalid location in dir item %u\n",
1467                                 location.type);
1468                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1469                                           key->objectid, key->offset, namebuf,
1470                                           len, filetype, key->type, error);
1471                 }
1472
1473                 len = sizeof(*di) + name_len + data_len;
1474                 di = (struct btrfs_dir_item *)((char *)di + len);
1475                 cur += len;
1476         }
1477         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1478                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1479
1480         return 0;
1481 }
1482
1483 static int process_inode_ref(struct extent_buffer *eb,
1484                              int slot, struct btrfs_key *key,
1485                              struct shared_node *active_node)
1486 {
1487         u32 total;
1488         u32 cur = 0;
1489         u32 len;
1490         u32 name_len;
1491         u64 index;
1492         int error;
1493         struct cache_tree *inode_cache;
1494         struct btrfs_inode_ref *ref;
1495         char namebuf[BTRFS_NAME_LEN];
1496
1497         inode_cache = &active_node->inode_cache;
1498
1499         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1500         total = btrfs_item_size_nr(eb, slot);
1501         while (cur < total) {
1502                 name_len = btrfs_inode_ref_name_len(eb, ref);
1503                 index = btrfs_inode_ref_index(eb, ref);
1504                 if (name_len <= BTRFS_NAME_LEN) {
1505                         len = name_len;
1506                         error = 0;
1507                 } else {
1508                         len = BTRFS_NAME_LEN;
1509                         error = REF_ERR_NAME_TOO_LONG;
1510                 }
1511                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1512                 add_inode_backref(inode_cache, key->objectid, key->offset,
1513                                   index, namebuf, len, 0, key->type, error);
1514
1515                 len = sizeof(*ref) + name_len;
1516                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1517                 cur += len;
1518         }
1519         return 0;
1520 }
1521
1522 static int process_inode_extref(struct extent_buffer *eb,
1523                                 int slot, struct btrfs_key *key,
1524                                 struct shared_node *active_node)
1525 {
1526         u32 total;
1527         u32 cur = 0;
1528         u32 len;
1529         u32 name_len;
1530         u64 index;
1531         u64 parent;
1532         int error;
1533         struct cache_tree *inode_cache;
1534         struct btrfs_inode_extref *extref;
1535         char namebuf[BTRFS_NAME_LEN];
1536
1537         inode_cache = &active_node->inode_cache;
1538
1539         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1540         total = btrfs_item_size_nr(eb, slot);
1541         while (cur < total) {
1542                 name_len = btrfs_inode_extref_name_len(eb, extref);
1543                 index = btrfs_inode_extref_index(eb, extref);
1544                 parent = btrfs_inode_extref_parent(eb, extref);
1545                 if (name_len <= BTRFS_NAME_LEN) {
1546                         len = name_len;
1547                         error = 0;
1548                 } else {
1549                         len = BTRFS_NAME_LEN;
1550                         error = REF_ERR_NAME_TOO_LONG;
1551                 }
1552                 read_extent_buffer(eb, namebuf,
1553                                    (unsigned long)(extref + 1), len);
1554                 add_inode_backref(inode_cache, key->objectid, parent,
1555                                   index, namebuf, len, 0, key->type, error);
1556
1557                 len = sizeof(*extref) + name_len;
1558                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1559                 cur += len;
1560         }
1561         return 0;
1562
1563 }
1564
1565 static int count_csum_range(struct btrfs_root *root, u64 start,
1566                             u64 len, u64 *found)
1567 {
1568         struct btrfs_key key;
1569         struct btrfs_path path;
1570         struct extent_buffer *leaf;
1571         int ret;
1572         size_t size;
1573         *found = 0;
1574         u64 csum_end;
1575         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1576
1577         btrfs_init_path(&path);
1578
1579         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1580         key.offset = start;
1581         key.type = BTRFS_EXTENT_CSUM_KEY;
1582
1583         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1584                                 &key, &path, 0, 0);
1585         if (ret < 0)
1586                 goto out;
1587         if (ret > 0 && path.slots[0] > 0) {
1588                 leaf = path.nodes[0];
1589                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1590                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1591                     key.type == BTRFS_EXTENT_CSUM_KEY)
1592                         path.slots[0]--;
1593         }
1594
1595         while (len > 0) {
1596                 leaf = path.nodes[0];
1597                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1598                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1599                         if (ret > 0)
1600                                 break;
1601                         else if (ret < 0)
1602                                 goto out;
1603                         leaf = path.nodes[0];
1604                 }
1605
1606                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1607                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1608                     key.type != BTRFS_EXTENT_CSUM_KEY)
1609                         break;
1610
1611                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1612                 if (key.offset >= start + len)
1613                         break;
1614
1615                 if (key.offset > start)
1616                         start = key.offset;
1617
1618                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1619                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1620                 if (csum_end > start) {
1621                         size = min(csum_end - start, len);
1622                         len -= size;
1623                         start += size;
1624                         *found += size;
1625                 }
1626
1627                 path.slots[0]++;
1628         }
1629 out:
1630         btrfs_release_path(&path);
1631         if (ret < 0)
1632                 return ret;
1633         return 0;
1634 }
1635
1636 static int process_file_extent(struct btrfs_root *root,
1637                                 struct extent_buffer *eb,
1638                                 int slot, struct btrfs_key *key,
1639                                 struct shared_node *active_node)
1640 {
1641         struct inode_record *rec;
1642         struct btrfs_file_extent_item *fi;
1643         u64 num_bytes = 0;
1644         u64 disk_bytenr = 0;
1645         u64 extent_offset = 0;
1646         u64 mask = root->sectorsize - 1;
1647         int extent_type;
1648         int ret;
1649
1650         rec = active_node->current;
1651         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1652         rec->found_file_extent = 1;
1653
1654         if (rec->extent_start == (u64)-1) {
1655                 rec->extent_start = key->offset;
1656                 rec->extent_end = key->offset;
1657         }
1658
1659         if (rec->extent_end > key->offset)
1660                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1661         else if (rec->extent_end < key->offset) {
1662                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1663                                            key->offset - rec->extent_end);
1664                 if (ret < 0)
1665                         return ret;
1666         }
1667
1668         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1669         extent_type = btrfs_file_extent_type(eb, fi);
1670
1671         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1672                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1673                 if (num_bytes == 0)
1674                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1675                 rec->found_size += num_bytes;
1676                 num_bytes = (num_bytes + mask) & ~mask;
1677         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1678                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1679                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1680                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1681                 extent_offset = btrfs_file_extent_offset(eb, fi);
1682                 if (num_bytes == 0 || (num_bytes & mask))
1683                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1684                 if (num_bytes + extent_offset >
1685                     btrfs_file_extent_ram_bytes(eb, fi))
1686                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1687                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1688                     (btrfs_file_extent_compression(eb, fi) ||
1689                      btrfs_file_extent_encryption(eb, fi) ||
1690                      btrfs_file_extent_other_encoding(eb, fi)))
1691                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1692                 if (disk_bytenr > 0)
1693                         rec->found_size += num_bytes;
1694         } else {
1695                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1696         }
1697         rec->extent_end = key->offset + num_bytes;
1698
1699         /*
1700          * The data reloc tree will copy full extents into its inode and then
1701          * copy the corresponding csums.  Because the extent it copied could be
1702          * a preallocated extent that hasn't been written to yet there may be no
1703          * csums to copy, ergo we won't have csums for our file extent.  This is
1704          * ok so just don't bother checking csums if the inode belongs to the
1705          * data reloc tree.
1706          */
1707         if (disk_bytenr > 0 &&
1708             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1709                 u64 found;
1710                 if (btrfs_file_extent_compression(eb, fi))
1711                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1712                 else
1713                         disk_bytenr += extent_offset;
1714
1715                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1716                 if (ret < 0)
1717                         return ret;
1718                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1719                         if (found > 0)
1720                                 rec->found_csum_item = 1;
1721                         if (found < num_bytes)
1722                                 rec->some_csum_missing = 1;
1723                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1724                         if (found > 0)
1725                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1726                 }
1727         }
1728         return 0;
1729 }
1730
1731 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1732                             struct walk_control *wc)
1733 {
1734         struct btrfs_key key;
1735         u32 nritems;
1736         int i;
1737         int ret = 0;
1738         struct cache_tree *inode_cache;
1739         struct shared_node *active_node;
1740
1741         if (wc->root_level == wc->active_node &&
1742             btrfs_root_refs(&root->root_item) == 0)
1743                 return 0;
1744
1745         active_node = wc->nodes[wc->active_node];
1746         inode_cache = &active_node->inode_cache;
1747         nritems = btrfs_header_nritems(eb);
1748         for (i = 0; i < nritems; i++) {
1749                 btrfs_item_key_to_cpu(eb, &key, i);
1750
1751                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1752                         continue;
1753                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1754                         continue;
1755
1756                 if (active_node->current == NULL ||
1757                     active_node->current->ino < key.objectid) {
1758                         if (active_node->current) {
1759                                 active_node->current->checked = 1;
1760                                 maybe_free_inode_rec(inode_cache,
1761                                                      active_node->current);
1762                         }
1763                         active_node->current = get_inode_rec(inode_cache,
1764                                                              key.objectid, 1);
1765                         BUG_ON(IS_ERR(active_node->current));
1766                 }
1767                 switch (key.type) {
1768                 case BTRFS_DIR_ITEM_KEY:
1769                 case BTRFS_DIR_INDEX_KEY:
1770                         ret = process_dir_item(root, eb, i, &key, active_node);
1771                         break;
1772                 case BTRFS_INODE_REF_KEY:
1773                         ret = process_inode_ref(eb, i, &key, active_node);
1774                         break;
1775                 case BTRFS_INODE_EXTREF_KEY:
1776                         ret = process_inode_extref(eb, i, &key, active_node);
1777                         break;
1778                 case BTRFS_INODE_ITEM_KEY:
1779                         ret = process_inode_item(eb, i, &key, active_node);
1780                         break;
1781                 case BTRFS_EXTENT_DATA_KEY:
1782                         ret = process_file_extent(root, eb, i, &key,
1783                                                   active_node);
1784                         break;
1785                 default:
1786                         break;
1787                 };
1788         }
1789         return ret;
1790 }
1791
1792 static void reada_walk_down(struct btrfs_root *root,
1793                             struct extent_buffer *node, int slot)
1794 {
1795         u64 bytenr;
1796         u64 ptr_gen;
1797         u32 nritems;
1798         u32 blocksize;
1799         int i;
1800         int level;
1801
1802         level = btrfs_header_level(node);
1803         if (level != 1)
1804                 return;
1805
1806         nritems = btrfs_header_nritems(node);
1807         blocksize = root->nodesize;
1808         for (i = slot; i < nritems; i++) {
1809                 bytenr = btrfs_node_blockptr(node, i);
1810                 ptr_gen = btrfs_node_ptr_generation(node, i);
1811                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1812         }
1813 }
1814
1815 /*
1816  * Check the child node/leaf by the following condition:
1817  * 1. the first item key of the node/leaf should be the same with the one
1818  *    in parent.
1819  * 2. block in parent node should match the child node/leaf.
1820  * 3. generation of parent node and child's header should be consistent.
1821  *
1822  * Or the child node/leaf pointed by the key in parent is not valid.
1823  *
1824  * We hope to check leaf owner too, but since subvol may share leaves,
1825  * which makes leaf owner check not so strong, key check should be
1826  * sufficient enough for that case.
1827  */
1828 static int check_child_node(struct btrfs_root *root,
1829                             struct extent_buffer *parent, int slot,
1830                             struct extent_buffer *child)
1831 {
1832         struct btrfs_key parent_key;
1833         struct btrfs_key child_key;
1834         int ret = 0;
1835
1836         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1837         if (btrfs_header_level(child) == 0)
1838                 btrfs_item_key_to_cpu(child, &child_key, 0);
1839         else
1840                 btrfs_node_key_to_cpu(child, &child_key, 0);
1841
1842         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1843                 ret = -EINVAL;
1844                 fprintf(stderr,
1845                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1846                         parent_key.objectid, parent_key.type, parent_key.offset,
1847                         child_key.objectid, child_key.type, child_key.offset);
1848         }
1849         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1850                 ret = -EINVAL;
1851                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1852                         btrfs_node_blockptr(parent, slot),
1853                         btrfs_header_bytenr(child));
1854         }
1855         if (btrfs_node_ptr_generation(parent, slot) !=
1856             btrfs_header_generation(child)) {
1857                 ret = -EINVAL;
1858                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1859                         btrfs_header_generation(child),
1860                         btrfs_node_ptr_generation(parent, slot));
1861         }
1862         return ret;
1863 }
1864
1865 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1866                           struct walk_control *wc, int *level)
1867 {
1868         enum btrfs_tree_block_status status;
1869         u64 bytenr;
1870         u64 ptr_gen;
1871         struct extent_buffer *next;
1872         struct extent_buffer *cur;
1873         u32 blocksize;
1874         int ret, err = 0;
1875         u64 refs;
1876
1877         WARN_ON(*level < 0);
1878         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1879         ret = btrfs_lookup_extent_info(NULL, root,
1880                                        path->nodes[*level]->start,
1881                                        *level, 1, &refs, NULL);
1882         if (ret < 0) {
1883                 err = ret;
1884                 goto out;
1885         }
1886
1887         if (refs > 1) {
1888                 ret = enter_shared_node(root, path->nodes[*level]->start,
1889                                         refs, wc, *level);
1890                 if (ret > 0) {
1891                         err = ret;
1892                         goto out;
1893                 }
1894         }
1895
1896         while (*level >= 0) {
1897                 WARN_ON(*level < 0);
1898                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1899                 cur = path->nodes[*level];
1900
1901                 if (btrfs_header_level(cur) != *level)
1902                         WARN_ON(1);
1903
1904                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1905                         break;
1906                 if (*level == 0) {
1907                         ret = process_one_leaf(root, cur, wc);
1908                         if (ret < 0)
1909                                 err = ret;
1910                         break;
1911                 }
1912                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1913                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1914                 blocksize = root->nodesize;
1915                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1916                                                1, &refs, NULL);
1917                 if (ret < 0)
1918                         refs = 0;
1919
1920                 if (refs > 1) {
1921                         ret = enter_shared_node(root, bytenr, refs,
1922                                                 wc, *level - 1);
1923                         if (ret > 0) {
1924                                 path->slots[*level]++;
1925                                 continue;
1926                         }
1927                 }
1928
1929                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1930                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1931                         free_extent_buffer(next);
1932                         reada_walk_down(root, cur, path->slots[*level]);
1933                         next = read_tree_block(root, bytenr, blocksize,
1934                                                ptr_gen);
1935                         if (!extent_buffer_uptodate(next)) {
1936                                 struct btrfs_key node_key;
1937
1938                                 btrfs_node_key_to_cpu(path->nodes[*level],
1939                                                       &node_key,
1940                                                       path->slots[*level]);
1941                                 btrfs_add_corrupt_extent_record(root->fs_info,
1942                                                 &node_key,
1943                                                 path->nodes[*level]->start,
1944                                                 root->nodesize, *level);
1945                                 err = -EIO;
1946                                 goto out;
1947                         }
1948                 }
1949
1950                 ret = check_child_node(root, cur, path->slots[*level], next);
1951                 if (ret) {
1952                         err = ret;
1953                         goto out;
1954                 }
1955
1956                 if (btrfs_is_leaf(next))
1957                         status = btrfs_check_leaf(root, NULL, next);
1958                 else
1959                         status = btrfs_check_node(root, NULL, next);
1960                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1961                         free_extent_buffer(next);
1962                         err = -EIO;
1963                         goto out;
1964                 }
1965
1966                 *level = *level - 1;
1967                 free_extent_buffer(path->nodes[*level]);
1968                 path->nodes[*level] = next;
1969                 path->slots[*level] = 0;
1970         }
1971 out:
1972         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1973         return err;
1974 }
1975
1976 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1977                         struct walk_control *wc, int *level)
1978 {
1979         int i;
1980         struct extent_buffer *leaf;
1981
1982         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1983                 leaf = path->nodes[i];
1984                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1985                         path->slots[i]++;
1986                         *level = i;
1987                         return 0;
1988                 } else {
1989                         free_extent_buffer(path->nodes[*level]);
1990                         path->nodes[*level] = NULL;
1991                         BUG_ON(*level > wc->active_node);
1992                         if (*level == wc->active_node)
1993                                 leave_shared_node(root, wc, *level);
1994                         *level = i + 1;
1995                 }
1996         }
1997         return 1;
1998 }
1999
2000 static int check_root_dir(struct inode_record *rec)
2001 {
2002         struct inode_backref *backref;
2003         int ret = -1;
2004
2005         if (!rec->found_inode_item || rec->errors)
2006                 goto out;
2007         if (rec->nlink != 1 || rec->found_link != 0)
2008                 goto out;
2009         if (list_empty(&rec->backrefs))
2010                 goto out;
2011         backref = to_inode_backref(rec->backrefs.next);
2012         if (!backref->found_inode_ref)
2013                 goto out;
2014         if (backref->index != 0 || backref->namelen != 2 ||
2015             memcmp(backref->name, "..", 2))
2016                 goto out;
2017         if (backref->found_dir_index || backref->found_dir_item)
2018                 goto out;
2019         ret = 0;
2020 out:
2021         return ret;
2022 }
2023
2024 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2025                               struct btrfs_root *root, struct btrfs_path *path,
2026                               struct inode_record *rec)
2027 {
2028         struct btrfs_inode_item *ei;
2029         struct btrfs_key key;
2030         int ret;
2031
2032         key.objectid = rec->ino;
2033         key.type = BTRFS_INODE_ITEM_KEY;
2034         key.offset = (u64)-1;
2035
2036         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2037         if (ret < 0)
2038                 goto out;
2039         if (ret) {
2040                 if (!path->slots[0]) {
2041                         ret = -ENOENT;
2042                         goto out;
2043                 }
2044                 path->slots[0]--;
2045                 ret = 0;
2046         }
2047         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2048         if (key.objectid != rec->ino) {
2049                 ret = -ENOENT;
2050                 goto out;
2051         }
2052
2053         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2054                             struct btrfs_inode_item);
2055         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2056         btrfs_mark_buffer_dirty(path->nodes[0]);
2057         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2058         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2059                root->root_key.objectid);
2060 out:
2061         btrfs_release_path(path);
2062         return ret;
2063 }
2064
2065 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2066                                     struct btrfs_root *root,
2067                                     struct btrfs_path *path,
2068                                     struct inode_record *rec)
2069 {
2070         int ret;
2071
2072         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2073         btrfs_release_path(path);
2074         if (!ret)
2075                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2076         return ret;
2077 }
2078
2079 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2080                                struct btrfs_root *root,
2081                                struct btrfs_path *path,
2082                                struct inode_record *rec)
2083 {
2084         struct btrfs_inode_item *ei;
2085         struct btrfs_key key;
2086         int ret = 0;
2087
2088         key.objectid = rec->ino;
2089         key.type = BTRFS_INODE_ITEM_KEY;
2090         key.offset = 0;
2091
2092         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2093         if (ret) {
2094                 if (ret > 0)
2095                         ret = -ENOENT;
2096                 goto out;
2097         }
2098
2099         /* Since ret == 0, no need to check anything */
2100         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2101                             struct btrfs_inode_item);
2102         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2103         btrfs_mark_buffer_dirty(path->nodes[0]);
2104         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2105         printf("reset nbytes for ino %llu root %llu\n",
2106                rec->ino, root->root_key.objectid);
2107 out:
2108         btrfs_release_path(path);
2109         return ret;
2110 }
2111
2112 static int add_missing_dir_index(struct btrfs_root *root,
2113                                  struct cache_tree *inode_cache,
2114                                  struct inode_record *rec,
2115                                  struct inode_backref *backref)
2116 {
2117         struct btrfs_path *path;
2118         struct btrfs_trans_handle *trans;
2119         struct btrfs_dir_item *dir_item;
2120         struct extent_buffer *leaf;
2121         struct btrfs_key key;
2122         struct btrfs_disk_key disk_key;
2123         struct inode_record *dir_rec;
2124         unsigned long name_ptr;
2125         u32 data_size = sizeof(*dir_item) + backref->namelen;
2126         int ret;
2127
2128         path = btrfs_alloc_path();
2129         if (!path)
2130                 return -ENOMEM;
2131
2132         trans = btrfs_start_transaction(root, 1);
2133         if (IS_ERR(trans)) {
2134                 btrfs_free_path(path);
2135                 return PTR_ERR(trans);
2136         }
2137
2138         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2139                 (unsigned long long)rec->ino);
2140         key.objectid = backref->dir;
2141         key.type = BTRFS_DIR_INDEX_KEY;
2142         key.offset = backref->index;
2143
2144         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2145         BUG_ON(ret);
2146
2147         leaf = path->nodes[0];
2148         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2149
2150         disk_key.objectid = cpu_to_le64(rec->ino);
2151         disk_key.type = BTRFS_INODE_ITEM_KEY;
2152         disk_key.offset = 0;
2153
2154         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2155         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2156         btrfs_set_dir_data_len(leaf, dir_item, 0);
2157         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2158         name_ptr = (unsigned long)(dir_item + 1);
2159         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2160         btrfs_mark_buffer_dirty(leaf);
2161         btrfs_free_path(path);
2162         btrfs_commit_transaction(trans, root);
2163
2164         backref->found_dir_index = 1;
2165         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2166         BUG_ON(IS_ERR(dir_rec));
2167         if (!dir_rec)
2168                 return 0;
2169         dir_rec->found_size += backref->namelen;
2170         if (dir_rec->found_size == dir_rec->isize &&
2171             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2172                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2173         if (dir_rec->found_size != dir_rec->isize)
2174                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2175
2176         return 0;
2177 }
2178
2179 static int delete_dir_index(struct btrfs_root *root,
2180                             struct cache_tree *inode_cache,
2181                             struct inode_record *rec,
2182                             struct inode_backref *backref)
2183 {
2184         struct btrfs_trans_handle *trans;
2185         struct btrfs_dir_item *di;
2186         struct btrfs_path *path;
2187         int ret = 0;
2188
2189         path = btrfs_alloc_path();
2190         if (!path)
2191                 return -ENOMEM;
2192
2193         trans = btrfs_start_transaction(root, 1);
2194         if (IS_ERR(trans)) {
2195                 btrfs_free_path(path);
2196                 return PTR_ERR(trans);
2197         }
2198
2199
2200         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2201                 (unsigned long long)backref->dir,
2202                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2203                 (unsigned long long)root->objectid);
2204
2205         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2206                                     backref->name, backref->namelen,
2207                                     backref->index, -1);
2208         if (IS_ERR(di)) {
2209                 ret = PTR_ERR(di);
2210                 btrfs_free_path(path);
2211                 btrfs_commit_transaction(trans, root);
2212                 if (ret == -ENOENT)
2213                         return 0;
2214                 return ret;
2215         }
2216
2217         if (!di)
2218                 ret = btrfs_del_item(trans, root, path);
2219         else
2220                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2221         BUG_ON(ret);
2222         btrfs_free_path(path);
2223         btrfs_commit_transaction(trans, root);
2224         return ret;
2225 }
2226
2227 static int create_inode_item(struct btrfs_root *root,
2228                              struct inode_record *rec,
2229                              struct inode_backref *backref, int root_dir)
2230 {
2231         struct btrfs_trans_handle *trans;
2232         struct btrfs_inode_item inode_item;
2233         time_t now = time(NULL);
2234         int ret;
2235
2236         trans = btrfs_start_transaction(root, 1);
2237         if (IS_ERR(trans)) {
2238                 ret = PTR_ERR(trans);
2239                 return ret;
2240         }
2241
2242         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2243                 "be incomplete, please check permissions and content after "
2244                 "the fsck completes.\n", (unsigned long long)root->objectid,
2245                 (unsigned long long)rec->ino);
2246
2247         memset(&inode_item, 0, sizeof(inode_item));
2248         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2249         if (root_dir)
2250                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2251         else
2252                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2253         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2254         if (rec->found_dir_item) {
2255                 if (rec->found_file_extent)
2256                         fprintf(stderr, "root %llu inode %llu has both a dir "
2257                                 "item and extents, unsure if it is a dir or a "
2258                                 "regular file so setting it as a directory\n",
2259                                 (unsigned long long)root->objectid,
2260                                 (unsigned long long)rec->ino);
2261                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2262                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2263         } else if (!rec->found_dir_item) {
2264                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2265                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2266         }
2267         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2268         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2269         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2270         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2271         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2272         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2273         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2274         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2275
2276         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2277         BUG_ON(ret);
2278         btrfs_commit_transaction(trans, root);
2279         return 0;
2280 }
2281
2282 static int repair_inode_backrefs(struct btrfs_root *root,
2283                                  struct inode_record *rec,
2284                                  struct cache_tree *inode_cache,
2285                                  int delete)
2286 {
2287         struct inode_backref *tmp, *backref;
2288         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2289         int ret = 0;
2290         int repaired = 0;
2291
2292         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2293                 if (!delete && rec->ino == root_dirid) {
2294                         if (!rec->found_inode_item) {
2295                                 ret = create_inode_item(root, rec, backref, 1);
2296                                 if (ret)
2297                                         break;
2298                                 repaired++;
2299                         }
2300                 }
2301
2302                 /* Index 0 for root dir's are special, don't mess with it */
2303                 if (rec->ino == root_dirid && backref->index == 0)
2304                         continue;
2305
2306                 if (delete &&
2307                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2308                      (backref->found_dir_index && backref->found_inode_ref &&
2309                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2310                         ret = delete_dir_index(root, inode_cache, rec, backref);
2311                         if (ret)
2312                                 break;
2313                         repaired++;
2314                         list_del(&backref->list);
2315                         free(backref);
2316                 }
2317
2318                 if (!delete && !backref->found_dir_index &&
2319                     backref->found_dir_item && backref->found_inode_ref) {
2320                         ret = add_missing_dir_index(root, inode_cache, rec,
2321                                                     backref);
2322                         if (ret)
2323                                 break;
2324                         repaired++;
2325                         if (backref->found_dir_item &&
2326                             backref->found_dir_index &&
2327                             backref->found_dir_index) {
2328                                 if (!backref->errors &&
2329                                     backref->found_inode_ref) {
2330                                         list_del(&backref->list);
2331                                         free(backref);
2332                                 }
2333                         }
2334                 }
2335
2336                 if (!delete && (!backref->found_dir_index &&
2337                                 !backref->found_dir_item &&
2338                                 backref->found_inode_ref)) {
2339                         struct btrfs_trans_handle *trans;
2340                         struct btrfs_key location;
2341
2342                         ret = check_dir_conflict(root, backref->name,
2343                                                  backref->namelen,
2344                                                  backref->dir,
2345                                                  backref->index);
2346                         if (ret) {
2347                                 /*
2348                                  * let nlink fixing routine to handle it,
2349                                  * which can do it better.
2350                                  */
2351                                 ret = 0;
2352                                 break;
2353                         }
2354                         location.objectid = rec->ino;
2355                         location.type = BTRFS_INODE_ITEM_KEY;
2356                         location.offset = 0;
2357
2358                         trans = btrfs_start_transaction(root, 1);
2359                         if (IS_ERR(trans)) {
2360                                 ret = PTR_ERR(trans);
2361                                 break;
2362                         }
2363                         fprintf(stderr, "adding missing dir index/item pair "
2364                                 "for inode %llu\n",
2365                                 (unsigned long long)rec->ino);
2366                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2367                                                     backref->namelen,
2368                                                     backref->dir, &location,
2369                                                     imode_to_type(rec->imode),
2370                                                     backref->index);
2371                         BUG_ON(ret);
2372                         btrfs_commit_transaction(trans, root);
2373                         repaired++;
2374                 }
2375
2376                 if (!delete && (backref->found_inode_ref &&
2377                                 backref->found_dir_index &&
2378                                 backref->found_dir_item &&
2379                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2380                                 !rec->found_inode_item)) {
2381                         ret = create_inode_item(root, rec, backref, 0);
2382                         if (ret)
2383                                 break;
2384                         repaired++;
2385                 }
2386
2387         }
2388         return ret ? ret : repaired;
2389 }
2390
2391 /*
2392  * To determine the file type for nlink/inode_item repair
2393  *
2394  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2395  * Return -ENOENT if file type is not found.
2396  */
2397 static int find_file_type(struct inode_record *rec, u8 *type)
2398 {
2399         struct inode_backref *backref;
2400
2401         /* For inode item recovered case */
2402         if (rec->found_inode_item) {
2403                 *type = imode_to_type(rec->imode);
2404                 return 0;
2405         }
2406
2407         list_for_each_entry(backref, &rec->backrefs, list) {
2408                 if (backref->found_dir_index || backref->found_dir_item) {
2409                         *type = backref->filetype;
2410                         return 0;
2411                 }
2412         }
2413         return -ENOENT;
2414 }
2415
2416 /*
2417  * To determine the file name for nlink repair
2418  *
2419  * Return 0 if file name is found, set name and namelen.
2420  * Return -ENOENT if file name is not found.
2421  */
2422 static int find_file_name(struct inode_record *rec,
2423                           char *name, int *namelen)
2424 {
2425         struct inode_backref *backref;
2426
2427         list_for_each_entry(backref, &rec->backrefs, list) {
2428                 if (backref->found_dir_index || backref->found_dir_item ||
2429                     backref->found_inode_ref) {
2430                         memcpy(name, backref->name, backref->namelen);
2431                         *namelen = backref->namelen;
2432                         return 0;
2433                 }
2434         }
2435         return -ENOENT;
2436 }
2437
2438 /* Reset the nlink of the inode to the correct one */
2439 static int reset_nlink(struct btrfs_trans_handle *trans,
2440                        struct btrfs_root *root,
2441                        struct btrfs_path *path,
2442                        struct inode_record *rec)
2443 {
2444         struct inode_backref *backref;
2445         struct inode_backref *tmp;
2446         struct btrfs_key key;
2447         struct btrfs_inode_item *inode_item;
2448         int ret = 0;
2449
2450         /* We don't believe this either, reset it and iterate backref */
2451         rec->found_link = 0;
2452
2453         /* Remove all backref including the valid ones */
2454         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2455                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2456                                    backref->index, backref->name,
2457                                    backref->namelen, 0);
2458                 if (ret < 0)
2459                         goto out;
2460
2461                 /* remove invalid backref, so it won't be added back */
2462                 if (!(backref->found_dir_index &&
2463                       backref->found_dir_item &&
2464                       backref->found_inode_ref)) {
2465                         list_del(&backref->list);
2466                         free(backref);
2467                 } else {
2468                         rec->found_link++;
2469                 }
2470         }
2471
2472         /* Set nlink to 0 */
2473         key.objectid = rec->ino;
2474         key.type = BTRFS_INODE_ITEM_KEY;
2475         key.offset = 0;
2476         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2477         if (ret < 0)
2478                 goto out;
2479         if (ret > 0) {
2480                 ret = -ENOENT;
2481                 goto out;
2482         }
2483         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2484                                     struct btrfs_inode_item);
2485         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2486         btrfs_mark_buffer_dirty(path->nodes[0]);
2487         btrfs_release_path(path);
2488
2489         /*
2490          * Add back valid inode_ref/dir_item/dir_index,
2491          * add_link() will handle the nlink inc, so new nlink must be correct
2492          */
2493         list_for_each_entry(backref, &rec->backrefs, list) {
2494                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2495                                      backref->name, backref->namelen,
2496                                      backref->filetype, &backref->index, 1);
2497                 if (ret < 0)
2498                         goto out;
2499         }
2500 out:
2501         btrfs_release_path(path);
2502         return ret;
2503 }
2504
2505 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2506                                struct btrfs_root *root,
2507                                struct btrfs_path *path,
2508                                struct inode_record *rec)
2509 {
2510         char *dir_name = "lost+found";
2511         char namebuf[BTRFS_NAME_LEN] = {0};
2512         u64 lost_found_ino;
2513         u32 mode = 0700;
2514         u8 type = 0;
2515         int namelen = 0;
2516         int name_recovered = 0;
2517         int type_recovered = 0;
2518         int ret = 0;
2519
2520         /*
2521          * Get file name and type first before these invalid inode ref
2522          * are deleted by remove_all_invalid_backref()
2523          */
2524         name_recovered = !find_file_name(rec, namebuf, &namelen);
2525         type_recovered = !find_file_type(rec, &type);
2526
2527         if (!name_recovered) {
2528                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2529                        rec->ino, rec->ino);
2530                 namelen = count_digits(rec->ino);
2531                 sprintf(namebuf, "%llu", rec->ino);
2532                 name_recovered = 1;
2533         }
2534         if (!type_recovered) {
2535                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2536                        rec->ino);
2537                 type = BTRFS_FT_REG_FILE;
2538                 type_recovered = 1;
2539         }
2540
2541         ret = reset_nlink(trans, root, path, rec);
2542         if (ret < 0) {
2543                 fprintf(stderr,
2544                         "Failed to reset nlink for inode %llu: %s\n",
2545                         rec->ino, strerror(-ret));
2546                 goto out;
2547         }
2548
2549         if (rec->found_link == 0) {
2550                 lost_found_ino = root->highest_inode;
2551                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2552                         ret = -EOVERFLOW;
2553                         goto out;
2554                 }
2555                 lost_found_ino++;
2556                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2557                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2558                                   mode);
2559                 if (ret < 0) {
2560                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2561                                 dir_name, strerror(-ret));
2562                         goto out;
2563                 }
2564                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2565                                      namebuf, namelen, type, NULL, 1);
2566                 /*
2567                  * Add ".INO" suffix several times to handle case where
2568                  * "FILENAME.INO" is already taken by another file.
2569                  */
2570                 while (ret == -EEXIST) {
2571                         /*
2572                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2573                          */
2574                         if (namelen + count_digits(rec->ino) + 1 >
2575                             BTRFS_NAME_LEN) {
2576                                 ret = -EFBIG;
2577                                 goto out;
2578                         }
2579                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2580                                  ".%llu", rec->ino);
2581                         namelen += count_digits(rec->ino) + 1;
2582                         ret = btrfs_add_link(trans, root, rec->ino,
2583                                              lost_found_ino, namebuf,
2584                                              namelen, type, NULL, 1);
2585                 }
2586                 if (ret < 0) {
2587                         fprintf(stderr,
2588                                 "Failed to link the inode %llu to %s dir: %s\n",
2589                                 rec->ino, dir_name, strerror(-ret));
2590                         goto out;
2591                 }
2592                 /*
2593                  * Just increase the found_link, don't actually add the
2594                  * backref. This will make things easier and this inode
2595                  * record will be freed after the repair is done.
2596                  * So fsck will not report problem about this inode.
2597                  */
2598                 rec->found_link++;
2599                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2600                        namelen, namebuf, dir_name);
2601         }
2602         printf("Fixed the nlink of inode %llu\n", rec->ino);
2603 out:
2604         /*
2605          * Clear the flag anyway, or we will loop forever for the same inode
2606          * as it will not be removed from the bad inode list and the dead loop
2607          * happens.
2608          */
2609         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2610         btrfs_release_path(path);
2611         return ret;
2612 }
2613
2614 /*
2615  * Check if there is any normal(reg or prealloc) file extent for given
2616  * ino.
2617  * This is used to determine the file type when neither its dir_index/item or
2618  * inode_item exists.
2619  *
2620  * This will *NOT* report error, if any error happens, just consider it does
2621  * not have any normal file extent.
2622  */
2623 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2624 {
2625         struct btrfs_path *path;
2626         struct btrfs_key key;
2627         struct btrfs_key found_key;
2628         struct btrfs_file_extent_item *fi;
2629         u8 type;
2630         int ret = 0;
2631
2632         path = btrfs_alloc_path();
2633         if (!path)
2634                 goto out;
2635         key.objectid = ino;
2636         key.type = BTRFS_EXTENT_DATA_KEY;
2637         key.offset = 0;
2638
2639         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2640         if (ret < 0) {
2641                 ret = 0;
2642                 goto out;
2643         }
2644         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2645                 ret = btrfs_next_leaf(root, path);
2646                 if (ret) {
2647                         ret = 0;
2648                         goto out;
2649                 }
2650         }
2651         while (1) {
2652                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2653                                       path->slots[0]);
2654                 if (found_key.objectid != ino ||
2655                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2656                         break;
2657                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2658                                     struct btrfs_file_extent_item);
2659                 type = btrfs_file_extent_type(path->nodes[0], fi);
2660                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2661                         ret = 1;
2662                         goto out;
2663                 }
2664         }
2665 out:
2666         btrfs_free_path(path);
2667         return ret;
2668 }
2669
2670 static u32 btrfs_type_to_imode(u8 type)
2671 {
2672         static u32 imode_by_btrfs_type[] = {
2673                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2674                 [BTRFS_FT_DIR]          = S_IFDIR,
2675                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2676                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2677                 [BTRFS_FT_FIFO]         = S_IFIFO,
2678                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2679                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2680         };
2681
2682         return imode_by_btrfs_type[(type)];
2683 }
2684
2685 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2686                                 struct btrfs_root *root,
2687                                 struct btrfs_path *path,
2688                                 struct inode_record *rec)
2689 {
2690         u8 filetype;
2691         u32 mode = 0700;
2692         int type_recovered = 0;
2693         int ret = 0;
2694
2695         printf("Trying to rebuild inode:%llu\n", rec->ino);
2696
2697         type_recovered = !find_file_type(rec, &filetype);
2698
2699         /*
2700          * Try to determine inode type if type not found.
2701          *
2702          * For found regular file extent, it must be FILE.
2703          * For found dir_item/index, it must be DIR.
2704          *
2705          * For undetermined one, use FILE as fallback.
2706          *
2707          * TODO:
2708          * 1. If found backref(inode_index/item is already handled) to it,
2709          *    it must be DIR.
2710          *    Need new inode-inode ref structure to allow search for that.
2711          */
2712         if (!type_recovered) {
2713                 if (rec->found_file_extent &&
2714                     find_normal_file_extent(root, rec->ino)) {
2715                         type_recovered = 1;
2716                         filetype = BTRFS_FT_REG_FILE;
2717                 } else if (rec->found_dir_item) {
2718                         type_recovered = 1;
2719                         filetype = BTRFS_FT_DIR;
2720                 } else if (!list_empty(&rec->orphan_extents)) {
2721                         type_recovered = 1;
2722                         filetype = BTRFS_FT_REG_FILE;
2723                 } else{
2724                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2725                                rec->ino);
2726                         type_recovered = 1;
2727                         filetype = BTRFS_FT_REG_FILE;
2728                 }
2729         }
2730
2731         ret = btrfs_new_inode(trans, root, rec->ino,
2732                               mode | btrfs_type_to_imode(filetype));
2733         if (ret < 0)
2734                 goto out;
2735
2736         /*
2737          * Here inode rebuild is done, we only rebuild the inode item,
2738          * don't repair the nlink(like move to lost+found).
2739          * That is the job of nlink repair.
2740          *
2741          * We just fill the record and return
2742          */
2743         rec->found_dir_item = 1;
2744         rec->imode = mode | btrfs_type_to_imode(filetype);
2745         rec->nlink = 0;
2746         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2747         /* Ensure the inode_nlinks repair function will be called */
2748         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2749 out:
2750         return ret;
2751 }
2752
2753 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2754                                       struct btrfs_root *root,
2755                                       struct btrfs_path *path,
2756                                       struct inode_record *rec)
2757 {
2758         struct orphan_data_extent *orphan;
2759         struct orphan_data_extent *tmp;
2760         int ret = 0;
2761
2762         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2763                 /*
2764                  * Check for conflicting file extents
2765                  *
2766                  * Here we don't know whether the extents is compressed or not,
2767                  * so we can only assume it not compressed nor data offset,
2768                  * and use its disk_len as extent length.
2769                  */
2770                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2771                                        orphan->offset, orphan->disk_len, 0);
2772                 btrfs_release_path(path);
2773                 if (ret < 0)
2774                         goto out;
2775                 if (!ret) {
2776                         fprintf(stderr,
2777                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2778                                 orphan->disk_bytenr, orphan->disk_len);
2779                         ret = btrfs_free_extent(trans,
2780                                         root->fs_info->extent_root,
2781                                         orphan->disk_bytenr, orphan->disk_len,
2782                                         0, root->objectid, orphan->objectid,
2783                                         orphan->offset);
2784                         if (ret < 0)
2785                                 goto out;
2786                 }
2787                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2788                                 orphan->offset, orphan->disk_bytenr,
2789                                 orphan->disk_len, orphan->disk_len);
2790                 if (ret < 0)
2791                         goto out;
2792
2793                 /* Update file size info */
2794                 rec->found_size += orphan->disk_len;
2795                 if (rec->found_size == rec->nbytes)
2796                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2797
2798                 /* Update the file extent hole info too */
2799                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2800                                            orphan->disk_len);
2801                 if (ret < 0)
2802                         goto out;
2803                 if (RB_EMPTY_ROOT(&rec->holes))
2804                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2805
2806                 list_del(&orphan->list);
2807                 free(orphan);
2808         }
2809         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2810 out:
2811         return ret;
2812 }
2813
2814 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2815                                         struct btrfs_root *root,
2816                                         struct btrfs_path *path,
2817                                         struct inode_record *rec)
2818 {
2819         struct rb_node *node;
2820         struct file_extent_hole *hole;
2821         int found = 0;
2822         int ret = 0;
2823
2824         node = rb_first(&rec->holes);
2825
2826         while (node) {
2827                 found = 1;
2828                 hole = rb_entry(node, struct file_extent_hole, node);
2829                 ret = btrfs_punch_hole(trans, root, rec->ino,
2830                                        hole->start, hole->len);
2831                 if (ret < 0)
2832                         goto out;
2833                 ret = del_file_extent_hole(&rec->holes, hole->start,
2834                                            hole->len);
2835                 if (ret < 0)
2836                         goto out;
2837                 if (RB_EMPTY_ROOT(&rec->holes))
2838                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2839                 node = rb_first(&rec->holes);
2840         }
2841         /* special case for a file losing all its file extent */
2842         if (!found) {
2843                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2844                                        round_up(rec->isize, root->sectorsize));
2845                 if (ret < 0)
2846                         goto out;
2847         }
2848         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2849                rec->ino, root->objectid);
2850 out:
2851         return ret;
2852 }
2853
2854 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2855 {
2856         struct btrfs_trans_handle *trans;
2857         struct btrfs_path *path;
2858         int ret = 0;
2859
2860         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2861                              I_ERR_NO_ORPHAN_ITEM |
2862                              I_ERR_LINK_COUNT_WRONG |
2863                              I_ERR_NO_INODE_ITEM |
2864                              I_ERR_FILE_EXTENT_ORPHAN |
2865                              I_ERR_FILE_EXTENT_DISCOUNT|
2866                              I_ERR_FILE_NBYTES_WRONG)))
2867                 return rec->errors;
2868
2869         path = btrfs_alloc_path();
2870         if (!path)
2871                 return -ENOMEM;
2872
2873         /*
2874          * For nlink repair, it may create a dir and add link, so
2875          * 2 for parent(256)'s dir_index and dir_item
2876          * 2 for lost+found dir's inode_item and inode_ref
2877          * 1 for the new inode_ref of the file
2878          * 2 for lost+found dir's dir_index and dir_item for the file
2879          */
2880         trans = btrfs_start_transaction(root, 7);
2881         if (IS_ERR(trans)) {
2882                 btrfs_free_path(path);
2883                 return PTR_ERR(trans);
2884         }
2885
2886         if (rec->errors & I_ERR_NO_INODE_ITEM)
2887                 ret = repair_inode_no_item(trans, root, path, rec);
2888         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2889                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2890         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2891                 ret = repair_inode_discount_extent(trans, root, path, rec);
2892         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2893                 ret = repair_inode_isize(trans, root, path, rec);
2894         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2895                 ret = repair_inode_orphan_item(trans, root, path, rec);
2896         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2897                 ret = repair_inode_nlinks(trans, root, path, rec);
2898         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2899                 ret = repair_inode_nbytes(trans, root, path, rec);
2900         btrfs_commit_transaction(trans, root);
2901         btrfs_free_path(path);
2902         return ret;
2903 }
2904
2905 static int check_inode_recs(struct btrfs_root *root,
2906                             struct cache_tree *inode_cache)
2907 {
2908         struct cache_extent *cache;
2909         struct ptr_node *node;
2910         struct inode_record *rec;
2911         struct inode_backref *backref;
2912         int stage = 0;
2913         int ret = 0;
2914         int err = 0;
2915         u64 error = 0;
2916         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2917
2918         if (btrfs_root_refs(&root->root_item) == 0) {
2919                 if (!cache_tree_empty(inode_cache))
2920                         fprintf(stderr, "warning line %d\n", __LINE__);
2921                 return 0;
2922         }
2923
2924         /*
2925          * We need to record the highest inode number for later 'lost+found'
2926          * dir creation.
2927          * We must select an ino not used/referred by any existing inode, or
2928          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2929          * this may cause 'lost+found' dir has wrong nlinks.
2930          */
2931         cache = last_cache_extent(inode_cache);
2932         if (cache) {
2933                 node = container_of(cache, struct ptr_node, cache);
2934                 rec = node->data;
2935                 if (rec->ino > root->highest_inode)
2936                         root->highest_inode = rec->ino;
2937         }
2938
2939         /*
2940          * We need to repair backrefs first because we could change some of the
2941          * errors in the inode recs.
2942          *
2943          * We also need to go through and delete invalid backrefs first and then
2944          * add the correct ones second.  We do this because we may get EEXIST
2945          * when adding back the correct index because we hadn't yet deleted the
2946          * invalid index.
2947          *
2948          * For example, if we were missing a dir index then the directories
2949          * isize would be wrong, so if we fixed the isize to what we thought it
2950          * would be and then fixed the backref we'd still have a invalid fs, so
2951          * we need to add back the dir index and then check to see if the isize
2952          * is still wrong.
2953          */
2954         while (stage < 3) {
2955                 stage++;
2956                 if (stage == 3 && !err)
2957                         break;
2958
2959                 cache = search_cache_extent(inode_cache, 0);
2960                 while (repair && cache) {
2961                         node = container_of(cache, struct ptr_node, cache);
2962                         rec = node->data;
2963                         cache = next_cache_extent(cache);
2964
2965                         /* Need to free everything up and rescan */
2966                         if (stage == 3) {
2967                                 remove_cache_extent(inode_cache, &node->cache);
2968                                 free(node);
2969                                 free_inode_rec(rec);
2970                                 continue;
2971                         }
2972
2973                         if (list_empty(&rec->backrefs))
2974                                 continue;
2975
2976                         ret = repair_inode_backrefs(root, rec, inode_cache,
2977                                                     stage == 1);
2978                         if (ret < 0) {
2979                                 err = ret;
2980                                 stage = 2;
2981                                 break;
2982                         } if (ret > 0) {
2983                                 err = -EAGAIN;
2984                         }
2985                 }
2986         }
2987         if (err)
2988                 return err;
2989
2990         rec = get_inode_rec(inode_cache, root_dirid, 0);
2991         BUG_ON(IS_ERR(rec));
2992         if (rec) {
2993                 ret = check_root_dir(rec);
2994                 if (ret) {
2995                         fprintf(stderr, "root %llu root dir %llu error\n",
2996                                 (unsigned long long)root->root_key.objectid,
2997                                 (unsigned long long)root_dirid);
2998                         print_inode_error(root, rec);
2999                         error++;
3000                 }
3001         } else {
3002                 if (repair) {
3003                         struct btrfs_trans_handle *trans;
3004
3005                         trans = btrfs_start_transaction(root, 1);
3006                         if (IS_ERR(trans)) {
3007                                 err = PTR_ERR(trans);
3008                                 return err;
3009                         }
3010
3011                         fprintf(stderr,
3012                                 "root %llu missing its root dir, recreating\n",
3013                                 (unsigned long long)root->objectid);
3014
3015                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3016                         BUG_ON(ret);
3017
3018                         btrfs_commit_transaction(trans, root);
3019                         return -EAGAIN;
3020                 }
3021
3022                 fprintf(stderr, "root %llu root dir %llu not found\n",
3023                         (unsigned long long)root->root_key.objectid,
3024                         (unsigned long long)root_dirid);
3025         }
3026
3027         while (1) {
3028                 cache = search_cache_extent(inode_cache, 0);
3029                 if (!cache)
3030                         break;
3031                 node = container_of(cache, struct ptr_node, cache);
3032                 rec = node->data;
3033                 remove_cache_extent(inode_cache, &node->cache);
3034                 free(node);
3035                 if (rec->ino == root_dirid ||
3036                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3037                         free_inode_rec(rec);
3038                         continue;
3039                 }
3040
3041                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3042                         ret = check_orphan_item(root, rec->ino);
3043                         if (ret == 0)
3044                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3045                         if (can_free_inode_rec(rec)) {
3046                                 free_inode_rec(rec);
3047                                 continue;
3048                         }
3049                 }
3050
3051                 if (!rec->found_inode_item)
3052                         rec->errors |= I_ERR_NO_INODE_ITEM;
3053                 if (rec->found_link != rec->nlink)
3054                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3055                 if (repair) {
3056                         ret = try_repair_inode(root, rec);
3057                         if (ret == 0 && can_free_inode_rec(rec)) {
3058                                 free_inode_rec(rec);
3059                                 continue;
3060                         }
3061                         ret = 0;
3062                 }
3063
3064                 if (!(repair && ret == 0))
3065                         error++;
3066                 print_inode_error(root, rec);
3067                 list_for_each_entry(backref, &rec->backrefs, list) {
3068                         if (!backref->found_dir_item)
3069                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3070                         if (!backref->found_dir_index)
3071                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3072                         if (!backref->found_inode_ref)
3073                                 backref->errors |= REF_ERR_NO_INODE_REF;
3074                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3075                                 " namelen %u name %s filetype %d errors %x",
3076                                 (unsigned long long)backref->dir,
3077                                 (unsigned long long)backref->index,
3078                                 backref->namelen, backref->name,
3079                                 backref->filetype, backref->errors);
3080                         print_ref_error(backref->errors);
3081                 }
3082                 free_inode_rec(rec);
3083         }
3084         return (error > 0) ? -1 : 0;
3085 }
3086
3087 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3088                                         u64 objectid)
3089 {
3090         struct cache_extent *cache;
3091         struct root_record *rec = NULL;
3092         int ret;
3093
3094         cache = lookup_cache_extent(root_cache, objectid, 1);
3095         if (cache) {
3096                 rec = container_of(cache, struct root_record, cache);
3097         } else {
3098                 rec = calloc(1, sizeof(*rec));
3099                 if (!rec)
3100                         return ERR_PTR(-ENOMEM);
3101                 rec->objectid = objectid;
3102                 INIT_LIST_HEAD(&rec->backrefs);
3103                 rec->cache.start = objectid;
3104                 rec->cache.size = 1;
3105
3106                 ret = insert_cache_extent(root_cache, &rec->cache);
3107                 if (ret)
3108                         return ERR_PTR(-EEXIST);
3109         }
3110         return rec;
3111 }
3112
3113 static struct root_backref *get_root_backref(struct root_record *rec,
3114                                              u64 ref_root, u64 dir, u64 index,
3115                                              const char *name, int namelen)
3116 {
3117         struct root_backref *backref;
3118
3119         list_for_each_entry(backref, &rec->backrefs, list) {
3120                 if (backref->ref_root != ref_root || backref->dir != dir ||
3121                     backref->namelen != namelen)
3122                         continue;
3123                 if (memcmp(name, backref->name, namelen))
3124                         continue;
3125                 return backref;
3126         }
3127
3128         backref = calloc(1, sizeof(*backref) + namelen + 1);
3129         if (!backref)
3130                 return NULL;
3131         backref->ref_root = ref_root;
3132         backref->dir = dir;
3133         backref->index = index;
3134         backref->namelen = namelen;
3135         memcpy(backref->name, name, namelen);
3136         backref->name[namelen] = '\0';
3137         list_add_tail(&backref->list, &rec->backrefs);
3138         return backref;
3139 }
3140
3141 static void free_root_record(struct cache_extent *cache)
3142 {
3143         struct root_record *rec;
3144         struct root_backref *backref;
3145
3146         rec = container_of(cache, struct root_record, cache);
3147         while (!list_empty(&rec->backrefs)) {
3148                 backref = to_root_backref(rec->backrefs.next);
3149                 list_del(&backref->list);
3150                 free(backref);
3151         }
3152
3153         kfree(rec);
3154 }
3155
3156 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3157
3158 static int add_root_backref(struct cache_tree *root_cache,
3159                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3160                             const char *name, int namelen,
3161                             int item_type, int errors)
3162 {
3163         struct root_record *rec;
3164         struct root_backref *backref;
3165
3166         rec = get_root_rec(root_cache, root_id);
3167         BUG_ON(IS_ERR(rec));
3168         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3169         BUG_ON(!backref);
3170
3171         backref->errors |= errors;
3172
3173         if (item_type != BTRFS_DIR_ITEM_KEY) {
3174                 if (backref->found_dir_index || backref->found_back_ref ||
3175                     backref->found_forward_ref) {
3176                         if (backref->index != index)
3177                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3178                 } else {
3179                         backref->index = index;
3180                 }
3181         }
3182
3183         if (item_type == BTRFS_DIR_ITEM_KEY) {
3184                 if (backref->found_forward_ref)
3185                         rec->found_ref++;
3186                 backref->found_dir_item = 1;
3187         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3188                 backref->found_dir_index = 1;
3189         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3190                 if (backref->found_forward_ref)
3191                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3192                 else if (backref->found_dir_item)
3193                         rec->found_ref++;
3194                 backref->found_forward_ref = 1;
3195         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3196                 if (backref->found_back_ref)
3197                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3198                 backref->found_back_ref = 1;
3199         } else {
3200                 BUG_ON(1);
3201         }
3202
3203         if (backref->found_forward_ref && backref->found_dir_item)
3204                 backref->reachable = 1;
3205         return 0;
3206 }
3207
3208 static int merge_root_recs(struct btrfs_root *root,
3209                            struct cache_tree *src_cache,
3210                            struct cache_tree *dst_cache)
3211 {
3212         struct cache_extent *cache;
3213         struct ptr_node *node;
3214         struct inode_record *rec;
3215         struct inode_backref *backref;
3216         int ret = 0;
3217
3218         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3219                 free_inode_recs_tree(src_cache);
3220                 return 0;
3221         }
3222
3223         while (1) {
3224                 cache = search_cache_extent(src_cache, 0);
3225                 if (!cache)
3226                         break;
3227                 node = container_of(cache, struct ptr_node, cache);
3228                 rec = node->data;
3229                 remove_cache_extent(src_cache, &node->cache);
3230                 free(node);
3231
3232                 ret = is_child_root(root, root->objectid, rec->ino);
3233                 if (ret < 0)
3234                         break;
3235                 else if (ret == 0)
3236                         goto skip;
3237
3238                 list_for_each_entry(backref, &rec->backrefs, list) {
3239                         BUG_ON(backref->found_inode_ref);
3240                         if (backref->found_dir_item)
3241                                 add_root_backref(dst_cache, rec->ino,
3242                                         root->root_key.objectid, backref->dir,
3243                                         backref->index, backref->name,
3244                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3245                                         backref->errors);
3246                         if (backref->found_dir_index)
3247                                 add_root_backref(dst_cache, rec->ino,
3248                                         root->root_key.objectid, backref->dir,
3249                                         backref->index, backref->name,
3250                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3251                                         backref->errors);
3252                 }
3253 skip:
3254                 free_inode_rec(rec);
3255         }
3256         if (ret < 0)
3257                 return ret;
3258         return 0;
3259 }
3260
3261 static int check_root_refs(struct btrfs_root *root,
3262                            struct cache_tree *root_cache)
3263 {
3264         struct root_record *rec;
3265         struct root_record *ref_root;
3266         struct root_backref *backref;
3267         struct cache_extent *cache;
3268         int loop = 1;
3269         int ret;
3270         int error;
3271         int errors = 0;
3272
3273         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3274         BUG_ON(IS_ERR(rec));
3275         rec->found_ref = 1;
3276
3277         /* fixme: this can not detect circular references */
3278         while (loop) {
3279                 loop = 0;
3280                 cache = search_cache_extent(root_cache, 0);
3281                 while (1) {
3282                         if (!cache)
3283                                 break;
3284                         rec = container_of(cache, struct root_record, cache);
3285                         cache = next_cache_extent(cache);
3286
3287                         if (rec->found_ref == 0)
3288                                 continue;
3289
3290                         list_for_each_entry(backref, &rec->backrefs, list) {
3291                                 if (!backref->reachable)
3292                                         continue;
3293
3294                                 ref_root = get_root_rec(root_cache,
3295                                                         backref->ref_root);
3296                                 BUG_ON(IS_ERR(ref_root));
3297                                 if (ref_root->found_ref > 0)
3298                                         continue;
3299
3300                                 backref->reachable = 0;
3301                                 rec->found_ref--;
3302                                 if (rec->found_ref == 0)
3303                                         loop = 1;
3304                         }
3305                 }
3306         }
3307
3308         cache = search_cache_extent(root_cache, 0);
3309         while (1) {
3310                 if (!cache)
3311                         break;
3312                 rec = container_of(cache, struct root_record, cache);
3313                 cache = next_cache_extent(cache);
3314
3315                 if (rec->found_ref == 0 &&
3316                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3317                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3318                         ret = check_orphan_item(root->fs_info->tree_root,
3319                                                 rec->objectid);
3320                         if (ret == 0)
3321                                 continue;
3322
3323                         /*
3324                          * If we don't have a root item then we likely just have
3325                          * a dir item in a snapshot for this root but no actual
3326                          * ref key or anything so it's meaningless.
3327                          */
3328                         if (!rec->found_root_item)
3329                                 continue;
3330                         errors++;
3331                         fprintf(stderr, "fs tree %llu not referenced\n",
3332                                 (unsigned long long)rec->objectid);
3333                 }
3334
3335                 error = 0;
3336                 if (rec->found_ref > 0 && !rec->found_root_item)
3337                         error = 1;
3338                 list_for_each_entry(backref, &rec->backrefs, list) {
3339                         if (!backref->found_dir_item)
3340                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3341                         if (!backref->found_dir_index)
3342                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3343                         if (!backref->found_back_ref)
3344                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3345                         if (!backref->found_forward_ref)
3346                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3347                         if (backref->reachable && backref->errors)
3348                                 error = 1;
3349                 }
3350                 if (!error)
3351                         continue;
3352
3353                 errors++;
3354                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3355                         (unsigned long long)rec->objectid, rec->found_ref,
3356                          rec->found_root_item ? "" : "not found");
3357
3358                 list_for_each_entry(backref, &rec->backrefs, list) {
3359                         if (!backref->reachable)
3360                                 continue;
3361                         if (!backref->errors && rec->found_root_item)
3362                                 continue;
3363                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3364                                 " index %llu namelen %u name %s errors %x\n",
3365                                 (unsigned long long)backref->ref_root,
3366                                 (unsigned long long)backref->dir,
3367                                 (unsigned long long)backref->index,
3368                                 backref->namelen, backref->name,
3369                                 backref->errors);
3370                         print_ref_error(backref->errors);
3371                 }
3372         }
3373         return errors > 0 ? 1 : 0;
3374 }
3375
3376 static int process_root_ref(struct extent_buffer *eb, int slot,
3377                             struct btrfs_key *key,
3378                             struct cache_tree *root_cache)
3379 {
3380         u64 dirid;
3381         u64 index;
3382         u32 len;
3383         u32 name_len;
3384         struct btrfs_root_ref *ref;
3385         char namebuf[BTRFS_NAME_LEN];
3386         int error;
3387
3388         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3389
3390         dirid = btrfs_root_ref_dirid(eb, ref);
3391         index = btrfs_root_ref_sequence(eb, ref);
3392         name_len = btrfs_root_ref_name_len(eb, ref);
3393
3394         if (name_len <= BTRFS_NAME_LEN) {
3395                 len = name_len;
3396                 error = 0;
3397         } else {
3398                 len = BTRFS_NAME_LEN;
3399                 error = REF_ERR_NAME_TOO_LONG;
3400         }
3401         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3402
3403         if (key->type == BTRFS_ROOT_REF_KEY) {
3404                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3405                                  index, namebuf, len, key->type, error);
3406         } else {
3407                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3408                                  index, namebuf, len, key->type, error);
3409         }
3410         return 0;
3411 }
3412
3413 static void free_corrupt_block(struct cache_extent *cache)
3414 {
3415         struct btrfs_corrupt_block *corrupt;
3416
3417         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3418         free(corrupt);
3419 }
3420
3421 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3422
3423 /*
3424  * Repair the btree of the given root.
3425  *
3426  * The fix is to remove the node key in corrupt_blocks cache_tree.
3427  * and rebalance the tree.
3428  * After the fix, the btree should be writeable.
3429  */
3430 static int repair_btree(struct btrfs_root *root,
3431                         struct cache_tree *corrupt_blocks)
3432 {
3433         struct btrfs_trans_handle *trans;
3434         struct btrfs_path *path;
3435         struct btrfs_corrupt_block *corrupt;
3436         struct cache_extent *cache;
3437         struct btrfs_key key;
3438         u64 offset;
3439         int level;
3440         int ret = 0;
3441
3442         if (cache_tree_empty(corrupt_blocks))
3443                 return 0;
3444
3445         path = btrfs_alloc_path();
3446         if (!path)
3447                 return -ENOMEM;
3448
3449         trans = btrfs_start_transaction(root, 1);
3450         if (IS_ERR(trans)) {
3451                 ret = PTR_ERR(trans);
3452                 fprintf(stderr, "Error starting transaction: %s\n",
3453                         strerror(-ret));
3454                 goto out_free_path;
3455         }
3456         cache = first_cache_extent(corrupt_blocks);
3457         while (cache) {
3458                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3459                                        cache);
3460                 level = corrupt->level;
3461                 path->lowest_level = level;
3462                 key.objectid = corrupt->key.objectid;
3463                 key.type = corrupt->key.type;
3464                 key.offset = corrupt->key.offset;
3465
3466                 /*
3467                  * Here we don't want to do any tree balance, since it may
3468                  * cause a balance with corrupted brother leaf/node,
3469                  * so ins_len set to 0 here.
3470                  * Balance will be done after all corrupt node/leaf is deleted.
3471                  */
3472                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3473                 if (ret < 0)
3474                         goto out;
3475                 offset = btrfs_node_blockptr(path->nodes[level],
3476                                              path->slots[level]);
3477
3478                 /* Remove the ptr */
3479                 ret = btrfs_del_ptr(trans, root, path, level,
3480                                     path->slots[level]);
3481                 if (ret < 0)
3482                         goto out;
3483                 /*
3484                  * Remove the corresponding extent
3485                  * return value is not concerned.
3486                  */
3487                 btrfs_release_path(path);
3488                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3489                                         0, root->root_key.objectid,
3490                                         level - 1, 0);
3491                 cache = next_cache_extent(cache);
3492         }
3493
3494         /* Balance the btree using btrfs_search_slot() */
3495         cache = first_cache_extent(corrupt_blocks);
3496         while (cache) {
3497                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3498                                        cache);
3499                 memcpy(&key, &corrupt->key, sizeof(key));
3500                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3501                 if (ret < 0)
3502                         goto out;
3503                 /* return will always >0 since it won't find the item */
3504                 ret = 0;
3505                 btrfs_release_path(path);
3506                 cache = next_cache_extent(cache);
3507         }
3508 out:
3509         btrfs_commit_transaction(trans, root);
3510 out_free_path:
3511         btrfs_free_path(path);
3512         return ret;
3513 }
3514
3515 static int check_fs_root(struct btrfs_root *root,
3516                          struct cache_tree *root_cache,
3517                          struct walk_control *wc)
3518 {
3519         int ret = 0;
3520         int err = 0;
3521         int wret;
3522         int level;
3523         struct btrfs_path path;
3524         struct shared_node root_node;
3525         struct root_record *rec;
3526         struct btrfs_root_item *root_item = &root->root_item;
3527         struct cache_tree corrupt_blocks;
3528         struct orphan_data_extent *orphan;
3529         struct orphan_data_extent *tmp;
3530         enum btrfs_tree_block_status status;
3531
3532         /*
3533          * Reuse the corrupt_block cache tree to record corrupted tree block
3534          *
3535          * Unlike the usage in extent tree check, here we do it in a per
3536          * fs/subvol tree base.
3537          */
3538         cache_tree_init(&corrupt_blocks);
3539         root->fs_info->corrupt_blocks = &corrupt_blocks;
3540
3541         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3542                 rec = get_root_rec(root_cache, root->root_key.objectid);
3543                 BUG_ON(IS_ERR(rec));
3544                 if (btrfs_root_refs(root_item) > 0)
3545                         rec->found_root_item = 1;
3546         }
3547
3548         btrfs_init_path(&path);
3549         memset(&root_node, 0, sizeof(root_node));
3550         cache_tree_init(&root_node.root_cache);
3551         cache_tree_init(&root_node.inode_cache);
3552
3553         /* Move the orphan extent record to corresponding inode_record */
3554         list_for_each_entry_safe(orphan, tmp,
3555                                  &root->orphan_data_extents, list) {
3556                 struct inode_record *inode;
3557
3558                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3559                                       1);
3560                 BUG_ON(IS_ERR(inode));
3561                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3562                 list_move(&orphan->list, &inode->orphan_extents);
3563         }
3564
3565         level = btrfs_header_level(root->node);
3566         memset(wc->nodes, 0, sizeof(wc->nodes));
3567         wc->nodes[level] = &root_node;
3568         wc->active_node = level;
3569         wc->root_level = level;
3570
3571         /* We may not have checked the root block, lets do that now */
3572         if (btrfs_is_leaf(root->node))
3573                 status = btrfs_check_leaf(root, NULL, root->node);
3574         else
3575                 status = btrfs_check_node(root, NULL, root->node);
3576         if (status != BTRFS_TREE_BLOCK_CLEAN)
3577                 return -EIO;
3578
3579         if (btrfs_root_refs(root_item) > 0 ||
3580             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3581                 path.nodes[level] = root->node;
3582                 extent_buffer_get(root->node);
3583                 path.slots[level] = 0;
3584         } else {
3585                 struct btrfs_key key;
3586                 struct btrfs_disk_key found_key;
3587
3588                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3589                 level = root_item->drop_level;
3590                 path.lowest_level = level;
3591                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3592                 if (wret < 0)
3593                         goto skip_walking;
3594                 btrfs_node_key(path.nodes[level], &found_key,
3595                                 path.slots[level]);
3596                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3597                                         sizeof(found_key)));
3598         }
3599
3600         while (1) {
3601                 wret = walk_down_tree(root, &path, wc, &level);
3602                 if (wret < 0)
3603                         ret = wret;
3604                 if (wret != 0)
3605                         break;
3606
3607                 wret = walk_up_tree(root, &path, wc, &level);
3608                 if (wret < 0)
3609                         ret = wret;
3610                 if (wret != 0)
3611                         break;
3612         }
3613 skip_walking:
3614         btrfs_release_path(&path);
3615
3616         if (!cache_tree_empty(&corrupt_blocks)) {
3617                 struct cache_extent *cache;
3618                 struct btrfs_corrupt_block *corrupt;
3619
3620                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3621                        root->root_key.objectid);
3622                 cache = first_cache_extent(&corrupt_blocks);
3623                 while (cache) {
3624                         corrupt = container_of(cache,
3625                                                struct btrfs_corrupt_block,
3626                                                cache);
3627                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3628                                cache->start, corrupt->level,
3629                                corrupt->key.objectid, corrupt->key.type,
3630                                corrupt->key.offset);
3631                         cache = next_cache_extent(cache);
3632                 }
3633                 if (repair) {
3634                         printf("Try to repair the btree for root %llu\n",
3635                                root->root_key.objectid);
3636                         ret = repair_btree(root, &corrupt_blocks);
3637                         if (ret < 0)
3638                                 fprintf(stderr, "Failed to repair btree: %s\n",
3639                                         strerror(-ret));
3640                         if (!ret)
3641                                 printf("Btree for root %llu is fixed\n",
3642                                        root->root_key.objectid);
3643                 }
3644         }
3645
3646         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3647         if (err < 0)
3648                 ret = err;
3649
3650         if (root_node.current) {
3651                 root_node.current->checked = 1;
3652                 maybe_free_inode_rec(&root_node.inode_cache,
3653                                 root_node.current);
3654         }
3655
3656         err = check_inode_recs(root, &root_node.inode_cache);
3657         if (!ret)
3658                 ret = err;
3659
3660         free_corrupt_blocks_tree(&corrupt_blocks);
3661         root->fs_info->corrupt_blocks = NULL;
3662         free_orphan_data_extents(&root->orphan_data_extents);
3663         return ret;
3664 }
3665
3666 static int fs_root_objectid(u64 objectid)
3667 {
3668         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3669             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3670                 return 1;
3671         return is_fstree(objectid);
3672 }
3673
3674 static int check_fs_roots(struct btrfs_root *root,
3675                           struct cache_tree *root_cache)
3676 {
3677         struct btrfs_path path;
3678         struct btrfs_key key;
3679         struct walk_control wc;
3680         struct extent_buffer *leaf, *tree_node;
3681         struct btrfs_root *tmp_root;
3682         struct btrfs_root *tree_root = root->fs_info->tree_root;
3683         int ret;
3684         int err = 0;
3685
3686         if (ctx.progress_enabled) {
3687                 ctx.tp = TASK_FS_ROOTS;
3688                 task_start(ctx.info);
3689         }
3690
3691         /*
3692          * Just in case we made any changes to the extent tree that weren't
3693          * reflected into the free space cache yet.
3694          */
3695         if (repair)
3696                 reset_cached_block_groups(root->fs_info);
3697         memset(&wc, 0, sizeof(wc));
3698         cache_tree_init(&wc.shared);
3699         btrfs_init_path(&path);
3700
3701 again:
3702         key.offset = 0;
3703         key.objectid = 0;
3704         key.type = BTRFS_ROOT_ITEM_KEY;
3705         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3706         if (ret < 0) {
3707                 err = 1;
3708                 goto out;
3709         }
3710         tree_node = tree_root->node;
3711         while (1) {
3712                 if (tree_node != tree_root->node) {
3713                         free_root_recs_tree(root_cache);
3714                         btrfs_release_path(&path);
3715                         goto again;
3716                 }
3717                 leaf = path.nodes[0];
3718                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3719                         ret = btrfs_next_leaf(tree_root, &path);
3720                         if (ret) {
3721                                 if (ret < 0)
3722                                         err = 1;
3723                                 break;
3724                         }
3725                         leaf = path.nodes[0];
3726                 }
3727                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3728                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3729                     fs_root_objectid(key.objectid)) {
3730                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3731                                 tmp_root = btrfs_read_fs_root_no_cache(
3732                                                 root->fs_info, &key);
3733                         } else {
3734                                 key.offset = (u64)-1;
3735                                 tmp_root = btrfs_read_fs_root(
3736                                                 root->fs_info, &key);
3737                         }
3738                         if (IS_ERR(tmp_root)) {
3739                                 err = 1;
3740                                 goto next;
3741                         }
3742                         ret = check_fs_root(tmp_root, root_cache, &wc);
3743                         if (ret == -EAGAIN) {
3744                                 free_root_recs_tree(root_cache);
3745                                 btrfs_release_path(&path);
3746                                 goto again;
3747                         }
3748                         if (ret)
3749                                 err = 1;
3750                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3751                                 btrfs_free_fs_root(tmp_root);
3752                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3753                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3754                         process_root_ref(leaf, path.slots[0], &key,
3755                                          root_cache);
3756                 }
3757 next:
3758                 path.slots[0]++;
3759         }
3760 out:
3761         btrfs_release_path(&path);
3762         if (err)
3763                 free_extent_cache_tree(&wc.shared);
3764         if (!cache_tree_empty(&wc.shared))
3765                 fprintf(stderr, "warning line %d\n", __LINE__);
3766
3767         task_stop(ctx.info);
3768
3769         return err;
3770 }
3771
3772 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3773 {
3774         struct list_head *cur = rec->backrefs.next;
3775         struct extent_backref *back;
3776         struct tree_backref *tback;
3777         struct data_backref *dback;
3778         u64 found = 0;
3779         int err = 0;
3780
3781         while(cur != &rec->backrefs) {
3782                 back = to_extent_backref(cur);
3783                 cur = cur->next;
3784                 if (!back->found_extent_tree) {
3785                         err = 1;
3786                         if (!print_errs)
3787                                 goto out;
3788                         if (back->is_data) {
3789                                 dback = to_data_backref(back);
3790                                 fprintf(stderr, "Backref %llu %s %llu"
3791                                         " owner %llu offset %llu num_refs %lu"
3792                                         " not found in extent tree\n",
3793                                         (unsigned long long)rec->start,
3794                                         back->full_backref ?
3795                                         "parent" : "root",
3796                                         back->full_backref ?
3797                                         (unsigned long long)dback->parent:
3798                                         (unsigned long long)dback->root,
3799                                         (unsigned long long)dback->owner,
3800                                         (unsigned long long)dback->offset,
3801                                         (unsigned long)dback->num_refs);
3802                         } else {
3803                                 tback = to_tree_backref(back);
3804                                 fprintf(stderr, "Backref %llu parent %llu"
3805                                         " root %llu not found in extent tree\n",
3806                                         (unsigned long long)rec->start,
3807                                         (unsigned long long)tback->parent,
3808                                         (unsigned long long)tback->root);
3809                         }
3810                 }
3811                 if (!back->is_data && !back->found_ref) {
3812                         err = 1;
3813                         if (!print_errs)
3814                                 goto out;
3815                         tback = to_tree_backref(back);
3816                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3817                                 (unsigned long long)rec->start,
3818                                 back->full_backref ? "parent" : "root",
3819                                 back->full_backref ?
3820                                 (unsigned long long)tback->parent :
3821                                 (unsigned long long)tback->root, back);
3822                 }
3823                 if (back->is_data) {
3824                         dback = to_data_backref(back);
3825                         if (dback->found_ref != dback->num_refs) {
3826                                 err = 1;
3827                                 if (!print_errs)
3828                                         goto out;
3829                                 fprintf(stderr, "Incorrect local backref count"
3830                                         " on %llu %s %llu owner %llu"
3831                                         " offset %llu found %u wanted %u back %p\n",
3832                                         (unsigned long long)rec->start,
3833                                         back->full_backref ?
3834                                         "parent" : "root",
3835                                         back->full_backref ?
3836                                         (unsigned long long)dback->parent:
3837                                         (unsigned long long)dback->root,
3838                                         (unsigned long long)dback->owner,
3839                                         (unsigned long long)dback->offset,
3840                                         dback->found_ref, dback->num_refs, back);
3841                         }
3842                         if (dback->disk_bytenr != rec->start) {
3843                                 err = 1;
3844                                 if (!print_errs)
3845                                         goto out;
3846                                 fprintf(stderr, "Backref disk bytenr does not"
3847                                         " match extent record, bytenr=%llu, "
3848                                         "ref bytenr=%llu\n",
3849                                         (unsigned long long)rec->start,
3850                                         (unsigned long long)dback->disk_bytenr);
3851                         }
3852
3853                         if (dback->bytes != rec->nr) {
3854                                 err = 1;
3855                                 if (!print_errs)
3856                                         goto out;
3857                                 fprintf(stderr, "Backref bytes do not match "
3858                                         "extent backref, bytenr=%llu, ref "
3859                                         "bytes=%llu, backref bytes=%llu\n",
3860                                         (unsigned long long)rec->start,
3861                                         (unsigned long long)rec->nr,
3862                                         (unsigned long long)dback->bytes);
3863                         }
3864                 }
3865                 if (!back->is_data) {
3866                         found += 1;
3867                 } else {
3868                         dback = to_data_backref(back);
3869                         found += dback->found_ref;
3870                 }
3871         }
3872         if (found != rec->refs) {
3873                 err = 1;
3874                 if (!print_errs)
3875                         goto out;
3876                 fprintf(stderr, "Incorrect global backref count "
3877                         "on %llu found %llu wanted %llu\n",
3878                         (unsigned long long)rec->start,
3879                         (unsigned long long)found,
3880                         (unsigned long long)rec->refs);
3881         }
3882 out:
3883         return err;
3884 }
3885
3886 static int free_all_extent_backrefs(struct extent_record *rec)
3887 {
3888         struct extent_backref *back;
3889         struct list_head *cur;
3890         while (!list_empty(&rec->backrefs)) {
3891                 cur = rec->backrefs.next;
3892                 back = to_extent_backref(cur);
3893                 list_del(cur);
3894                 free(back);
3895         }
3896         return 0;
3897 }
3898
3899 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3900                                      struct cache_tree *extent_cache)
3901 {
3902         struct cache_extent *cache;
3903         struct extent_record *rec;
3904
3905         while (1) {
3906                 cache = first_cache_extent(extent_cache);
3907                 if (!cache)
3908                         break;
3909                 rec = container_of(cache, struct extent_record, cache);
3910                 remove_cache_extent(extent_cache, cache);
3911                 free_all_extent_backrefs(rec);
3912                 free(rec);
3913         }
3914 }
3915
3916 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3917                                  struct extent_record *rec)
3918 {
3919         if (rec->content_checked && rec->owner_ref_checked &&
3920             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3921             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3922             !rec->bad_full_backref && !rec->crossing_stripes &&
3923             !rec->wrong_chunk_type) {
3924                 remove_cache_extent(extent_cache, &rec->cache);
3925                 free_all_extent_backrefs(rec);
3926                 list_del_init(&rec->list);
3927                 free(rec);
3928         }
3929         return 0;
3930 }
3931
3932 static int check_owner_ref(struct btrfs_root *root,
3933                             struct extent_record *rec,
3934                             struct extent_buffer *buf)
3935 {
3936         struct extent_backref *node;
3937         struct tree_backref *back;
3938         struct btrfs_root *ref_root;
3939         struct btrfs_key key;
3940         struct btrfs_path path;
3941         struct extent_buffer *parent;
3942         int level;
3943         int found = 0;
3944         int ret;
3945
3946         list_for_each_entry(node, &rec->backrefs, list) {
3947                 if (node->is_data)
3948                         continue;
3949                 if (!node->found_ref)
3950                         continue;
3951                 if (node->full_backref)
3952                         continue;
3953                 back = to_tree_backref(node);
3954                 if (btrfs_header_owner(buf) == back->root)
3955                         return 0;
3956         }
3957         BUG_ON(rec->is_root);
3958
3959         /* try to find the block by search corresponding fs tree */
3960         key.objectid = btrfs_header_owner(buf);
3961         key.type = BTRFS_ROOT_ITEM_KEY;
3962         key.offset = (u64)-1;
3963
3964         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3965         if (IS_ERR(ref_root))
3966                 return 1;
3967
3968         level = btrfs_header_level(buf);
3969         if (level == 0)
3970                 btrfs_item_key_to_cpu(buf, &key, 0);
3971         else
3972                 btrfs_node_key_to_cpu(buf, &key, 0);
3973
3974         btrfs_init_path(&path);
3975         path.lowest_level = level + 1;
3976         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3977         if (ret < 0)
3978                 return 0;
3979
3980         parent = path.nodes[level + 1];
3981         if (parent && buf->start == btrfs_node_blockptr(parent,
3982                                                         path.slots[level + 1]))
3983                 found = 1;
3984
3985         btrfs_release_path(&path);
3986         return found ? 0 : 1;
3987 }
3988
3989 static int is_extent_tree_record(struct extent_record *rec)
3990 {
3991         struct list_head *cur = rec->backrefs.next;
3992         struct extent_backref *node;
3993         struct tree_backref *back;
3994         int is_extent = 0;
3995
3996         while(cur != &rec->backrefs) {
3997                 node = to_extent_backref(cur);
3998                 cur = cur->next;
3999                 if (node->is_data)
4000                         return 0;
4001                 back = to_tree_backref(node);
4002                 if (node->full_backref)
4003                         return 0;
4004                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4005                         is_extent = 1;
4006         }
4007         return is_extent;
4008 }
4009
4010
4011 static int record_bad_block_io(struct btrfs_fs_info *info,
4012                                struct cache_tree *extent_cache,
4013                                u64 start, u64 len)
4014 {
4015         struct extent_record *rec;
4016         struct cache_extent *cache;
4017         struct btrfs_key key;
4018
4019         cache = lookup_cache_extent(extent_cache, start, len);
4020         if (!cache)
4021                 return 0;
4022
4023         rec = container_of(cache, struct extent_record, cache);
4024         if (!is_extent_tree_record(rec))
4025                 return 0;
4026
4027         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4028         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4029 }
4030
4031 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4032                        struct extent_buffer *buf, int slot)
4033 {
4034         if (btrfs_header_level(buf)) {
4035                 struct btrfs_key_ptr ptr1, ptr2;
4036
4037                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4038                                    sizeof(struct btrfs_key_ptr));
4039                 read_extent_buffer(buf, &ptr2,
4040                                    btrfs_node_key_ptr_offset(slot + 1),
4041                                    sizeof(struct btrfs_key_ptr));
4042                 write_extent_buffer(buf, &ptr1,
4043                                     btrfs_node_key_ptr_offset(slot + 1),
4044                                     sizeof(struct btrfs_key_ptr));
4045                 write_extent_buffer(buf, &ptr2,
4046                                     btrfs_node_key_ptr_offset(slot),
4047                                     sizeof(struct btrfs_key_ptr));
4048                 if (slot == 0) {
4049                         struct btrfs_disk_key key;
4050                         btrfs_node_key(buf, &key, 0);
4051                         btrfs_fixup_low_keys(root, path, &key,
4052                                              btrfs_header_level(buf) + 1);
4053                 }
4054         } else {
4055                 struct btrfs_item *item1, *item2;
4056                 struct btrfs_key k1, k2;
4057                 char *item1_data, *item2_data;
4058                 u32 item1_offset, item2_offset, item1_size, item2_size;
4059
4060                 item1 = btrfs_item_nr(slot);
4061                 item2 = btrfs_item_nr(slot + 1);
4062                 btrfs_item_key_to_cpu(buf, &k1, slot);
4063                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4064                 item1_offset = btrfs_item_offset(buf, item1);
4065                 item2_offset = btrfs_item_offset(buf, item2);
4066                 item1_size = btrfs_item_size(buf, item1);
4067                 item2_size = btrfs_item_size(buf, item2);
4068
4069                 item1_data = malloc(item1_size);
4070                 if (!item1_data)
4071                         return -ENOMEM;
4072                 item2_data = malloc(item2_size);
4073                 if (!item2_data) {
4074                         free(item1_data);
4075                         return -ENOMEM;
4076                 }
4077
4078                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4079                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4080
4081                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4082                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4083                 free(item1_data);
4084                 free(item2_data);
4085
4086                 btrfs_set_item_offset(buf, item1, item2_offset);
4087                 btrfs_set_item_offset(buf, item2, item1_offset);
4088                 btrfs_set_item_size(buf, item1, item2_size);
4089                 btrfs_set_item_size(buf, item2, item1_size);
4090
4091                 path->slots[0] = slot;
4092                 btrfs_set_item_key_unsafe(root, path, &k2);
4093                 path->slots[0] = slot + 1;
4094                 btrfs_set_item_key_unsafe(root, path, &k1);
4095         }
4096         return 0;
4097 }
4098
4099 static int fix_key_order(struct btrfs_trans_handle *trans,
4100                          struct btrfs_root *root,
4101                          struct btrfs_path *path)
4102 {
4103         struct extent_buffer *buf;
4104         struct btrfs_key k1, k2;
4105         int i;
4106         int level = path->lowest_level;
4107         int ret = -EIO;
4108
4109         buf = path->nodes[level];
4110         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4111                 if (level) {
4112                         btrfs_node_key_to_cpu(buf, &k1, i);
4113                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4114                 } else {
4115                         btrfs_item_key_to_cpu(buf, &k1, i);
4116                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4117                 }
4118                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4119                         continue;
4120                 ret = swap_values(root, path, buf, i);
4121                 if (ret)
4122                         break;
4123                 btrfs_mark_buffer_dirty(buf);
4124                 i = 0;
4125         }
4126         return ret;
4127 }
4128
4129 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4130                              struct btrfs_root *root,
4131                              struct btrfs_path *path,
4132                              struct extent_buffer *buf, int slot)
4133 {
4134         struct btrfs_key key;
4135         int nritems = btrfs_header_nritems(buf);
4136
4137         btrfs_item_key_to_cpu(buf, &key, slot);
4138
4139         /* These are all the keys we can deal with missing. */
4140         if (key.type != BTRFS_DIR_INDEX_KEY &&
4141             key.type != BTRFS_EXTENT_ITEM_KEY &&
4142             key.type != BTRFS_METADATA_ITEM_KEY &&
4143             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4144             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4145                 return -1;
4146
4147         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4148                (unsigned long long)key.objectid, key.type,
4149                (unsigned long long)key.offset, slot, buf->start);
4150         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4151                               btrfs_item_nr_offset(slot + 1),
4152                               sizeof(struct btrfs_item) *
4153                               (nritems - slot - 1));
4154         btrfs_set_header_nritems(buf, nritems - 1);
4155         if (slot == 0) {
4156                 struct btrfs_disk_key disk_key;
4157
4158                 btrfs_item_key(buf, &disk_key, 0);
4159                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4160         }
4161         btrfs_mark_buffer_dirty(buf);
4162         return 0;
4163 }
4164
4165 static int fix_item_offset(struct btrfs_trans_handle *trans,
4166                            struct btrfs_root *root,
4167                            struct btrfs_path *path)
4168 {
4169         struct extent_buffer *buf;
4170         int i;
4171         int ret = 0;
4172
4173         /* We should only get this for leaves */
4174         BUG_ON(path->lowest_level);
4175         buf = path->nodes[0];
4176 again:
4177         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4178                 unsigned int shift = 0, offset;
4179
4180                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4181                     BTRFS_LEAF_DATA_SIZE(root)) {
4182                         if (btrfs_item_end_nr(buf, i) >
4183                             BTRFS_LEAF_DATA_SIZE(root)) {
4184                                 ret = delete_bogus_item(trans, root, path,
4185                                                         buf, i);
4186                                 if (!ret)
4187                                         goto again;
4188                                 fprintf(stderr, "item is off the end of the "
4189                                         "leaf, can't fix\n");
4190                                 ret = -EIO;
4191                                 break;
4192                         }
4193                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4194                                 btrfs_item_end_nr(buf, i);
4195                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4196                            btrfs_item_offset_nr(buf, i - 1)) {
4197                         if (btrfs_item_end_nr(buf, i) >
4198                             btrfs_item_offset_nr(buf, i - 1)) {
4199                                 ret = delete_bogus_item(trans, root, path,
4200                                                         buf, i);
4201                                 if (!ret)
4202                                         goto again;
4203                                 fprintf(stderr, "items overlap, can't fix\n");
4204                                 ret = -EIO;
4205                                 break;
4206                         }
4207                         shift = btrfs_item_offset_nr(buf, i - 1) -
4208                                 btrfs_item_end_nr(buf, i);
4209                 }
4210                 if (!shift)
4211                         continue;
4212
4213                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4214                        i, shift, (unsigned long long)buf->start);
4215                 offset = btrfs_item_offset_nr(buf, i);
4216                 memmove_extent_buffer(buf,
4217                                       btrfs_leaf_data(buf) + offset + shift,
4218                                       btrfs_leaf_data(buf) + offset,
4219                                       btrfs_item_size_nr(buf, i));
4220                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4221                                       offset + shift);
4222                 btrfs_mark_buffer_dirty(buf);
4223         }
4224
4225         /*
4226          * We may have moved things, in which case we want to exit so we don't
4227          * write those changes out.  Once we have proper abort functionality in
4228          * progs this can be changed to something nicer.
4229          */
4230         BUG_ON(ret);
4231         return ret;
4232 }
4233
4234 /*
4235  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4236  * then just return -EIO.
4237  */
4238 static int try_to_fix_bad_block(struct btrfs_root *root,
4239                                 struct extent_buffer *buf,
4240                                 enum btrfs_tree_block_status status)
4241 {
4242         struct btrfs_trans_handle *trans;
4243         struct ulist *roots;
4244         struct ulist_node *node;
4245         struct btrfs_root *search_root;
4246         struct btrfs_path *path;
4247         struct ulist_iterator iter;
4248         struct btrfs_key root_key, key;
4249         int ret;
4250
4251         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4252             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4253                 return -EIO;
4254
4255         path = btrfs_alloc_path();
4256         if (!path)
4257                 return -EIO;
4258
4259         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4260                                    0, &roots);
4261         if (ret) {
4262                 btrfs_free_path(path);
4263                 return -EIO;
4264         }
4265
4266         ULIST_ITER_INIT(&iter);
4267         while ((node = ulist_next(roots, &iter))) {
4268                 root_key.objectid = node->val;
4269                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4270                 root_key.offset = (u64)-1;
4271
4272                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4273                 if (IS_ERR(root)) {
4274                         ret = -EIO;
4275                         break;
4276                 }
4277
4278
4279                 trans = btrfs_start_transaction(search_root, 0);
4280                 if (IS_ERR(trans)) {
4281                         ret = PTR_ERR(trans);
4282                         break;
4283                 }
4284
4285                 path->lowest_level = btrfs_header_level(buf);
4286                 path->skip_check_block = 1;
4287                 if (path->lowest_level)
4288                         btrfs_node_key_to_cpu(buf, &key, 0);
4289                 else
4290                         btrfs_item_key_to_cpu(buf, &key, 0);
4291                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4292                 if (ret) {
4293                         ret = -EIO;
4294                         btrfs_commit_transaction(trans, search_root);
4295                         break;
4296                 }
4297                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4298                         ret = fix_key_order(trans, search_root, path);
4299                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4300                         ret = fix_item_offset(trans, search_root, path);
4301                 if (ret) {
4302                         btrfs_commit_transaction(trans, search_root);
4303                         break;
4304                 }
4305                 btrfs_release_path(path);
4306                 btrfs_commit_transaction(trans, search_root);
4307         }
4308         ulist_free(roots);
4309         btrfs_free_path(path);
4310         return ret;
4311 }
4312
4313 static int check_block(struct btrfs_root *root,
4314                        struct cache_tree *extent_cache,
4315                        struct extent_buffer *buf, u64 flags)
4316 {
4317         struct extent_record *rec;
4318         struct cache_extent *cache;
4319         struct btrfs_key key;
4320         enum btrfs_tree_block_status status;
4321         int ret = 0;
4322         int level;
4323
4324         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4325         if (!cache)
4326                 return 1;
4327         rec = container_of(cache, struct extent_record, cache);
4328         rec->generation = btrfs_header_generation(buf);
4329
4330         level = btrfs_header_level(buf);
4331         if (btrfs_header_nritems(buf) > 0) {
4332
4333                 if (level == 0)
4334                         btrfs_item_key_to_cpu(buf, &key, 0);
4335                 else
4336                         btrfs_node_key_to_cpu(buf, &key, 0);
4337
4338                 rec->info_objectid = key.objectid;
4339         }
4340         rec->info_level = level;
4341
4342         if (btrfs_is_leaf(buf))
4343                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4344         else
4345                 status = btrfs_check_node(root, &rec->parent_key, buf);
4346
4347         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4348                 if (repair)
4349                         status = try_to_fix_bad_block(root, buf, status);
4350                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4351                         ret = -EIO;
4352                         fprintf(stderr, "bad block %llu\n",
4353                                 (unsigned long long)buf->start);
4354                 } else {
4355                         /*
4356                          * Signal to callers we need to start the scan over
4357                          * again since we'll have cowed blocks.
4358                          */
4359                         ret = -EAGAIN;
4360                 }
4361         } else {
4362                 rec->content_checked = 1;
4363                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4364                         rec->owner_ref_checked = 1;
4365                 else {
4366                         ret = check_owner_ref(root, rec, buf);
4367                         if (!ret)
4368                                 rec->owner_ref_checked = 1;
4369                 }
4370         }
4371         if (!ret)
4372                 maybe_free_extent_rec(extent_cache, rec);
4373         return ret;
4374 }
4375
4376 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4377                                                 u64 parent, u64 root)
4378 {
4379         struct list_head *cur = rec->backrefs.next;
4380         struct extent_backref *node;
4381         struct tree_backref *back;
4382
4383         while(cur != &rec->backrefs) {
4384                 node = to_extent_backref(cur);
4385                 cur = cur->next;
4386                 if (node->is_data)
4387                         continue;
4388                 back = to_tree_backref(node);
4389                 if (parent > 0) {
4390                         if (!node->full_backref)
4391                                 continue;
4392                         if (parent == back->parent)
4393                                 return back;
4394                 } else {
4395                         if (node->full_backref)
4396                                 continue;
4397                         if (back->root == root)
4398                                 return back;
4399                 }
4400         }
4401         return NULL;
4402 }
4403
4404 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4405                                                 u64 parent, u64 root)
4406 {
4407         struct tree_backref *ref = malloc(sizeof(*ref));
4408
4409         if (!ref)
4410                 return NULL;
4411         memset(&ref->node, 0, sizeof(ref->node));
4412         if (parent > 0) {
4413                 ref->parent = parent;
4414                 ref->node.full_backref = 1;
4415         } else {
4416                 ref->root = root;
4417                 ref->node.full_backref = 0;
4418         }
4419         list_add_tail(&ref->node.list, &rec->backrefs);
4420
4421         return ref;
4422 }
4423
4424 static struct data_backref *find_data_backref(struct extent_record *rec,
4425                                                 u64 parent, u64 root,
4426                                                 u64 owner, u64 offset,
4427                                                 int found_ref,
4428                                                 u64 disk_bytenr, u64 bytes)
4429 {
4430         struct list_head *cur = rec->backrefs.next;
4431         struct extent_backref *node;
4432         struct data_backref *back;
4433
4434         while(cur != &rec->backrefs) {
4435                 node = to_extent_backref(cur);
4436                 cur = cur->next;
4437                 if (!node->is_data)
4438                         continue;
4439                 back = to_data_backref(node);
4440                 if (parent > 0) {
4441                         if (!node->full_backref)
4442                                 continue;
4443                         if (parent == back->parent)
4444                                 return back;
4445                 } else {
4446                         if (node->full_backref)
4447                                 continue;
4448                         if (back->root == root && back->owner == owner &&
4449                             back->offset == offset) {
4450                                 if (found_ref && node->found_ref &&
4451                                     (back->bytes != bytes ||
4452                                     back->disk_bytenr != disk_bytenr))
4453                                         continue;
4454                                 return back;
4455                         }
4456                 }
4457         }
4458         return NULL;
4459 }
4460
4461 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4462                                                 u64 parent, u64 root,
4463                                                 u64 owner, u64 offset,
4464                                                 u64 max_size)
4465 {
4466         struct data_backref *ref = malloc(sizeof(*ref));
4467
4468         if (!ref)
4469                 return NULL;
4470         memset(&ref->node, 0, sizeof(ref->node));
4471         ref->node.is_data = 1;
4472
4473         if (parent > 0) {
4474                 ref->parent = parent;
4475                 ref->owner = 0;
4476                 ref->offset = 0;
4477                 ref->node.full_backref = 1;
4478         } else {
4479                 ref->root = root;
4480                 ref->owner = owner;
4481                 ref->offset = offset;
4482                 ref->node.full_backref = 0;
4483         }
4484         ref->bytes = max_size;
4485         ref->found_ref = 0;
4486         ref->num_refs = 0;
4487         list_add_tail(&ref->node.list, &rec->backrefs);
4488         if (max_size > rec->max_size)
4489                 rec->max_size = max_size;
4490         return ref;
4491 }
4492
4493 /* Check if the type of extent matches with its chunk */
4494 static void check_extent_type(struct extent_record *rec)
4495 {
4496         struct btrfs_block_group_cache *bg_cache;
4497
4498         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4499         if (!bg_cache)
4500                 return;
4501
4502         /* data extent, check chunk directly*/
4503         if (!rec->metadata) {
4504                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4505                         rec->wrong_chunk_type = 1;
4506                 return;
4507         }
4508
4509         /* metadata extent, check the obvious case first */
4510         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4511                                  BTRFS_BLOCK_GROUP_METADATA))) {
4512                 rec->wrong_chunk_type = 1;
4513                 return;
4514         }
4515
4516         /*
4517          * Check SYSTEM extent, as it's also marked as metadata, we can only
4518          * make sure it's a SYSTEM extent by its backref
4519          */
4520         if (!list_empty(&rec->backrefs)) {
4521                 struct extent_backref *node;
4522                 struct tree_backref *tback;
4523                 u64 bg_type;
4524
4525                 node = to_extent_backref(rec->backrefs.next);
4526                 if (node->is_data) {
4527                         /* tree block shouldn't have data backref */
4528                         rec->wrong_chunk_type = 1;
4529                         return;
4530                 }
4531                 tback = container_of(node, struct tree_backref, node);
4532
4533                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4534                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4535                 else
4536                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4537                 if (!(bg_cache->flags & bg_type))
4538                         rec->wrong_chunk_type = 1;
4539         }
4540 }
4541
4542 /*
4543  * Allocate a new extent record, fill default values from @tmpl and insert int
4544  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4545  * the cache, otherwise it fails.
4546  */
4547 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4548                 struct extent_record *tmpl)
4549 {
4550         struct extent_record *rec;
4551         int ret = 0;
4552
4553         rec = malloc(sizeof(*rec));
4554         if (!rec)
4555                 return -ENOMEM;
4556         rec->start = tmpl->start;
4557         rec->max_size = tmpl->max_size;
4558         rec->nr = max(tmpl->nr, tmpl->max_size);
4559         rec->found_rec = tmpl->found_rec;
4560         rec->content_checked = tmpl->content_checked;
4561         rec->owner_ref_checked = tmpl->owner_ref_checked;
4562         rec->num_duplicates = 0;
4563         rec->metadata = tmpl->metadata;
4564         rec->flag_block_full_backref = FLAG_UNSET;
4565         rec->bad_full_backref = 0;
4566         rec->crossing_stripes = 0;
4567         rec->wrong_chunk_type = 0;
4568         rec->is_root = tmpl->is_root;
4569         rec->refs = tmpl->refs;
4570         rec->extent_item_refs = tmpl->extent_item_refs;
4571         rec->parent_generation = tmpl->parent_generation;
4572         INIT_LIST_HEAD(&rec->backrefs);
4573         INIT_LIST_HEAD(&rec->dups);
4574         INIT_LIST_HEAD(&rec->list);
4575         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4576         rec->cache.start = tmpl->start;
4577         rec->cache.size = tmpl->nr;
4578         ret = insert_cache_extent(extent_cache, &rec->cache);
4579         BUG_ON(ret);
4580         bytes_used += rec->nr;
4581
4582         if (tmpl->metadata)
4583                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4584                                 global_info->tree_root->nodesize);
4585         check_extent_type(rec);
4586         return ret;
4587 }
4588
4589 /*
4590  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4591  * some are hints:
4592  * - refs              - if found, increase refs
4593  * - is_root           - if found, set
4594  * - content_checked   - if found, set
4595  * - owner_ref_checked - if found, set
4596  *
4597  * If not found, create a new one, initialize and insert.
4598  */
4599 static int add_extent_rec(struct cache_tree *extent_cache,
4600                 struct extent_record *tmpl)
4601 {
4602         struct extent_record *rec;
4603         struct cache_extent *cache;
4604         int ret = 0;
4605         int dup = 0;
4606
4607         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4608         if (cache) {
4609                 rec = container_of(cache, struct extent_record, cache);
4610                 if (tmpl->refs)
4611                         rec->refs++;
4612                 if (rec->nr == 1)
4613                         rec->nr = max(tmpl->nr, tmpl->max_size);
4614
4615                 /*
4616                  * We need to make sure to reset nr to whatever the extent
4617                  * record says was the real size, this way we can compare it to
4618                  * the backrefs.
4619                  */
4620                 if (tmpl->found_rec) {
4621                         if (tmpl->start != rec->start || rec->found_rec) {
4622                                 struct extent_record *tmp;
4623
4624                                 dup = 1;
4625                                 if (list_empty(&rec->list))
4626                                         list_add_tail(&rec->list,
4627                                                       &duplicate_extents);
4628
4629                                 /*
4630                                  * We have to do this song and dance in case we
4631                                  * find an extent record that falls inside of
4632                                  * our current extent record but does not have
4633                                  * the same objectid.
4634                                  */
4635                                 tmp = malloc(sizeof(*tmp));
4636                                 if (!tmp)
4637                                         return -ENOMEM;
4638                                 tmp->start = tmpl->start;
4639                                 tmp->max_size = tmpl->max_size;
4640                                 tmp->nr = tmpl->nr;
4641                                 tmp->found_rec = 1;
4642                                 tmp->metadata = tmpl->metadata;
4643                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4644                                 INIT_LIST_HEAD(&tmp->list);
4645                                 list_add_tail(&tmp->list, &rec->dups);
4646                                 rec->num_duplicates++;
4647                         } else {
4648                                 rec->nr = tmpl->nr;
4649                                 rec->found_rec = 1;
4650                         }
4651                 }
4652
4653                 if (tmpl->extent_item_refs && !dup) {
4654                         if (rec->extent_item_refs) {
4655                                 fprintf(stderr, "block %llu rec "
4656                                         "extent_item_refs %llu, passed %llu\n",
4657                                         (unsigned long long)tmpl->start,
4658                                         (unsigned long long)
4659                                                         rec->extent_item_refs,
4660                                         (unsigned long long)tmpl->extent_item_refs);
4661                         }
4662                         rec->extent_item_refs = tmpl->extent_item_refs;
4663                 }
4664                 if (tmpl->is_root)
4665                         rec->is_root = 1;
4666                 if (tmpl->content_checked)
4667                         rec->content_checked = 1;
4668                 if (tmpl->owner_ref_checked)
4669                         rec->owner_ref_checked = 1;
4670                 memcpy(&rec->parent_key, &tmpl->parent_key,
4671                                 sizeof(tmpl->parent_key));
4672                 if (tmpl->parent_generation)
4673                         rec->parent_generation = tmpl->parent_generation;
4674                 if (rec->max_size < tmpl->max_size)
4675                         rec->max_size = tmpl->max_size;
4676
4677                 /*
4678                  * A metadata extent can't cross stripe_len boundary, otherwise
4679                  * kernel scrub won't be able to handle it.
4680                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4681                  * it.
4682                  */
4683                 if (tmpl->metadata)
4684                         rec->crossing_stripes = check_crossing_stripes(
4685                                 rec->start, global_info->tree_root->nodesize);
4686                 check_extent_type(rec);
4687                 maybe_free_extent_rec(extent_cache, rec);
4688                 return ret;
4689         }
4690
4691         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4692
4693         return ret;
4694 }
4695
4696 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4697                             u64 parent, u64 root, int found_ref)
4698 {
4699         struct extent_record *rec;
4700         struct tree_backref *back;
4701         struct cache_extent *cache;
4702
4703         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4704         if (!cache) {
4705                 struct extent_record tmpl;
4706
4707                 memset(&tmpl, 0, sizeof(tmpl));
4708                 tmpl.start = bytenr;
4709                 tmpl.nr = 1;
4710                 tmpl.metadata = 1;
4711
4712                 add_extent_rec_nolookup(extent_cache, &tmpl);
4713
4714                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4715                 if (!cache)
4716                         abort();
4717         }
4718
4719         rec = container_of(cache, struct extent_record, cache);
4720         if (rec->start != bytenr) {
4721                 abort();
4722         }
4723
4724         back = find_tree_backref(rec, parent, root);
4725         if (!back) {
4726                 back = alloc_tree_backref(rec, parent, root);
4727                 BUG_ON(!back);
4728         }
4729
4730         if (found_ref) {
4731                 if (back->node.found_ref) {
4732                         fprintf(stderr, "Extent back ref already exists "
4733                                 "for %llu parent %llu root %llu \n",
4734                                 (unsigned long long)bytenr,
4735                                 (unsigned long long)parent,
4736                                 (unsigned long long)root);
4737                 }
4738                 back->node.found_ref = 1;
4739         } else {
4740                 if (back->node.found_extent_tree) {
4741                         fprintf(stderr, "Extent back ref already exists "
4742                                 "for %llu parent %llu root %llu \n",
4743                                 (unsigned long long)bytenr,
4744                                 (unsigned long long)parent,
4745                                 (unsigned long long)root);
4746                 }
4747                 back->node.found_extent_tree = 1;
4748         }
4749         check_extent_type(rec);
4750         maybe_free_extent_rec(extent_cache, rec);
4751         return 0;
4752 }
4753
4754 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4755                             u64 parent, u64 root, u64 owner, u64 offset,
4756                             u32 num_refs, int found_ref, u64 max_size)
4757 {
4758         struct extent_record *rec;
4759         struct data_backref *back;
4760         struct cache_extent *cache;
4761
4762         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4763         if (!cache) {
4764                 struct extent_record tmpl;
4765
4766                 memset(&tmpl, 0, sizeof(tmpl));
4767                 tmpl.start = bytenr;
4768                 tmpl.nr = 1;
4769                 tmpl.max_size = max_size;
4770
4771                 add_extent_rec_nolookup(extent_cache, &tmpl);
4772
4773                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4774                 if (!cache)
4775                         abort();
4776         }
4777
4778         rec = container_of(cache, struct extent_record, cache);
4779         if (rec->max_size < max_size)
4780                 rec->max_size = max_size;
4781
4782         /*
4783          * If found_ref is set then max_size is the real size and must match the
4784          * existing refs.  So if we have already found a ref then we need to
4785          * make sure that this ref matches the existing one, otherwise we need
4786          * to add a new backref so we can notice that the backrefs don't match
4787          * and we need to figure out who is telling the truth.  This is to
4788          * account for that awful fsync bug I introduced where we'd end up with
4789          * a btrfs_file_extent_item that would have its length include multiple
4790          * prealloc extents or point inside of a prealloc extent.
4791          */
4792         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4793                                  bytenr, max_size);
4794         if (!back) {
4795                 back = alloc_data_backref(rec, parent, root, owner, offset,
4796                                           max_size);
4797                 BUG_ON(!back);
4798         }
4799
4800         if (found_ref) {
4801                 BUG_ON(num_refs != 1);
4802                 if (back->node.found_ref)
4803                         BUG_ON(back->bytes != max_size);
4804                 back->node.found_ref = 1;
4805                 back->found_ref += 1;
4806                 back->bytes = max_size;
4807                 back->disk_bytenr = bytenr;
4808                 rec->refs += 1;
4809                 rec->content_checked = 1;
4810                 rec->owner_ref_checked = 1;
4811         } else {
4812                 if (back->node.found_extent_tree) {
4813                         fprintf(stderr, "Extent back ref already exists "
4814                                 "for %llu parent %llu root %llu "
4815                                 "owner %llu offset %llu num_refs %lu\n",
4816                                 (unsigned long long)bytenr,
4817                                 (unsigned long long)parent,
4818                                 (unsigned long long)root,
4819                                 (unsigned long long)owner,
4820                                 (unsigned long long)offset,
4821                                 (unsigned long)num_refs);
4822                 }
4823                 back->num_refs = num_refs;
4824                 back->node.found_extent_tree = 1;
4825         }
4826         maybe_free_extent_rec(extent_cache, rec);
4827         return 0;
4828 }
4829
4830 static int add_pending(struct cache_tree *pending,
4831                        struct cache_tree *seen, u64 bytenr, u32 size)
4832 {
4833         int ret;
4834         ret = add_cache_extent(seen, bytenr, size);
4835         if (ret)
4836                 return ret;
4837         add_cache_extent(pending, bytenr, size);
4838         return 0;
4839 }
4840
4841 static int pick_next_pending(struct cache_tree *pending,
4842                         struct cache_tree *reada,
4843                         struct cache_tree *nodes,
4844                         u64 last, struct block_info *bits, int bits_nr,
4845                         int *reada_bits)
4846 {
4847         unsigned long node_start = last;
4848         struct cache_extent *cache;
4849         int ret;
4850
4851         cache = search_cache_extent(reada, 0);
4852         if (cache) {
4853                 bits[0].start = cache->start;
4854                 bits[0].size = cache->size;
4855                 *reada_bits = 1;
4856                 return 1;
4857         }
4858         *reada_bits = 0;
4859         if (node_start > 32768)
4860                 node_start -= 32768;
4861
4862         cache = search_cache_extent(nodes, node_start);
4863         if (!cache)
4864                 cache = search_cache_extent(nodes, 0);
4865
4866         if (!cache) {
4867                  cache = search_cache_extent(pending, 0);
4868                  if (!cache)
4869                          return 0;
4870                  ret = 0;
4871                  do {
4872                          bits[ret].start = cache->start;
4873                          bits[ret].size = cache->size;
4874                          cache = next_cache_extent(cache);
4875                          ret++;
4876                  } while (cache && ret < bits_nr);
4877                  return ret;
4878         }
4879
4880         ret = 0;
4881         do {
4882                 bits[ret].start = cache->start;
4883                 bits[ret].size = cache->size;
4884                 cache = next_cache_extent(cache);
4885                 ret++;
4886         } while (cache && ret < bits_nr);
4887
4888         if (bits_nr - ret > 8) {
4889                 u64 lookup = bits[0].start + bits[0].size;
4890                 struct cache_extent *next;
4891                 next = search_cache_extent(pending, lookup);
4892                 while(next) {
4893                         if (next->start - lookup > 32768)
4894                                 break;
4895                         bits[ret].start = next->start;
4896                         bits[ret].size = next->size;
4897                         lookup = next->start + next->size;
4898                         ret++;
4899                         if (ret == bits_nr)
4900                                 break;
4901                         next = next_cache_extent(next);
4902                         if (!next)
4903                                 break;
4904                 }
4905         }
4906         return ret;
4907 }
4908
4909 static void free_chunk_record(struct cache_extent *cache)
4910 {
4911         struct chunk_record *rec;
4912
4913         rec = container_of(cache, struct chunk_record, cache);
4914         list_del_init(&rec->list);
4915         list_del_init(&rec->dextents);
4916         free(rec);
4917 }
4918
4919 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4920 {
4921         cache_tree_free_extents(chunk_cache, free_chunk_record);
4922 }
4923
4924 static void free_device_record(struct rb_node *node)
4925 {
4926         struct device_record *rec;
4927
4928         rec = container_of(node, struct device_record, node);
4929         free(rec);
4930 }
4931
4932 FREE_RB_BASED_TREE(device_cache, free_device_record);
4933
4934 int insert_block_group_record(struct block_group_tree *tree,
4935                               struct block_group_record *bg_rec)
4936 {
4937         int ret;
4938
4939         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4940         if (ret)
4941                 return ret;
4942
4943         list_add_tail(&bg_rec->list, &tree->block_groups);
4944         return 0;
4945 }
4946
4947 static void free_block_group_record(struct cache_extent *cache)
4948 {
4949         struct block_group_record *rec;
4950
4951         rec = container_of(cache, struct block_group_record, cache);
4952         list_del_init(&rec->list);
4953         free(rec);
4954 }
4955
4956 void free_block_group_tree(struct block_group_tree *tree)
4957 {
4958         cache_tree_free_extents(&tree->tree, free_block_group_record);
4959 }
4960
4961 int insert_device_extent_record(struct device_extent_tree *tree,
4962                                 struct device_extent_record *de_rec)
4963 {
4964         int ret;
4965
4966         /*
4967          * Device extent is a bit different from the other extents, because
4968          * the extents which belong to the different devices may have the
4969          * same start and size, so we need use the special extent cache
4970          * search/insert functions.
4971          */
4972         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4973         if (ret)
4974                 return ret;
4975
4976         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4977         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4978         return 0;
4979 }
4980
4981 static void free_device_extent_record(struct cache_extent *cache)
4982 {
4983         struct device_extent_record *rec;
4984
4985         rec = container_of(cache, struct device_extent_record, cache);
4986         if (!list_empty(&rec->chunk_list))
4987                 list_del_init(&rec->chunk_list);
4988         if (!list_empty(&rec->device_list))
4989                 list_del_init(&rec->device_list);
4990         free(rec);
4991 }
4992
4993 void free_device_extent_tree(struct device_extent_tree *tree)
4994 {
4995         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4996 }
4997
4998 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4999 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5000                                  struct extent_buffer *leaf, int slot)
5001 {
5002         struct btrfs_extent_ref_v0 *ref0;
5003         struct btrfs_key key;
5004
5005         btrfs_item_key_to_cpu(leaf, &key, slot);
5006         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5007         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5008                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5009         } else {
5010                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5011                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5012         }
5013         return 0;
5014 }
5015 #endif
5016
5017 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5018                                             struct btrfs_key *key,
5019                                             int slot)
5020 {
5021         struct btrfs_chunk *ptr;
5022         struct chunk_record *rec;
5023         int num_stripes, i;
5024
5025         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5026         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5027
5028         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5029         if (!rec) {
5030                 fprintf(stderr, "memory allocation failed\n");
5031                 exit(-1);
5032         }
5033
5034         INIT_LIST_HEAD(&rec->list);
5035         INIT_LIST_HEAD(&rec->dextents);
5036         rec->bg_rec = NULL;
5037
5038         rec->cache.start = key->offset;
5039         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5040
5041         rec->generation = btrfs_header_generation(leaf);
5042
5043         rec->objectid = key->objectid;
5044         rec->type = key->type;
5045         rec->offset = key->offset;
5046
5047         rec->length = rec->cache.size;
5048         rec->owner = btrfs_chunk_owner(leaf, ptr);
5049         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5050         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5051         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5052         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5053         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5054         rec->num_stripes = num_stripes;
5055         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5056
5057         for (i = 0; i < rec->num_stripes; ++i) {
5058                 rec->stripes[i].devid =
5059                         btrfs_stripe_devid_nr(leaf, ptr, i);
5060                 rec->stripes[i].offset =
5061                         btrfs_stripe_offset_nr(leaf, ptr, i);
5062                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5063                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5064                                 BTRFS_UUID_SIZE);
5065         }
5066
5067         return rec;
5068 }
5069
5070 static int process_chunk_item(struct cache_tree *chunk_cache,
5071                               struct btrfs_key *key, struct extent_buffer *eb,
5072                               int slot)
5073 {
5074         struct chunk_record *rec;
5075         int ret = 0;
5076
5077         rec = btrfs_new_chunk_record(eb, key, slot);
5078         ret = insert_cache_extent(chunk_cache, &rec->cache);
5079         if (ret) {
5080                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5081                         rec->offset, rec->length);
5082                 free(rec);
5083         }
5084
5085         return ret;
5086 }
5087
5088 static int process_device_item(struct rb_root *dev_cache,
5089                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5090 {
5091         struct btrfs_dev_item *ptr;
5092         struct device_record *rec;
5093         int ret = 0;
5094
5095         ptr = btrfs_item_ptr(eb,
5096                 slot, struct btrfs_dev_item);
5097
5098         rec = malloc(sizeof(*rec));
5099         if (!rec) {
5100                 fprintf(stderr, "memory allocation failed\n");
5101                 return -ENOMEM;
5102         }
5103
5104         rec->devid = key->offset;
5105         rec->generation = btrfs_header_generation(eb);
5106
5107         rec->objectid = key->objectid;
5108         rec->type = key->type;
5109         rec->offset = key->offset;
5110
5111         rec->devid = btrfs_device_id(eb, ptr);
5112         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5113         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5114
5115         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5116         if (ret) {
5117                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5118                 free(rec);
5119         }
5120
5121         return ret;
5122 }
5123
5124 struct block_group_record *
5125 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5126                              int slot)
5127 {
5128         struct btrfs_block_group_item *ptr;
5129         struct block_group_record *rec;
5130
5131         rec = calloc(1, sizeof(*rec));
5132         if (!rec) {
5133                 fprintf(stderr, "memory allocation failed\n");
5134                 exit(-1);
5135         }
5136
5137         rec->cache.start = key->objectid;
5138         rec->cache.size = key->offset;
5139
5140         rec->generation = btrfs_header_generation(leaf);
5141
5142         rec->objectid = key->objectid;
5143         rec->type = key->type;
5144         rec->offset = key->offset;
5145
5146         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5147         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5148
5149         INIT_LIST_HEAD(&rec->list);
5150
5151         return rec;
5152 }
5153
5154 static int process_block_group_item(struct block_group_tree *block_group_cache,
5155                                     struct btrfs_key *key,
5156                                     struct extent_buffer *eb, int slot)
5157 {
5158         struct block_group_record *rec;
5159         int ret = 0;
5160
5161         rec = btrfs_new_block_group_record(eb, key, slot);
5162         ret = insert_block_group_record(block_group_cache, rec);
5163         if (ret) {
5164                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5165                         rec->objectid, rec->offset);
5166                 free(rec);
5167         }
5168
5169         return ret;
5170 }
5171
5172 struct device_extent_record *
5173 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5174                                struct btrfs_key *key, int slot)
5175 {
5176         struct device_extent_record *rec;
5177         struct btrfs_dev_extent *ptr;
5178
5179         rec = calloc(1, sizeof(*rec));
5180         if (!rec) {
5181                 fprintf(stderr, "memory allocation failed\n");
5182                 exit(-1);
5183         }
5184
5185         rec->cache.objectid = key->objectid;
5186         rec->cache.start = key->offset;
5187
5188         rec->generation = btrfs_header_generation(leaf);
5189
5190         rec->objectid = key->objectid;
5191         rec->type = key->type;
5192         rec->offset = key->offset;
5193
5194         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5195         rec->chunk_objecteid =
5196                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5197         rec->chunk_offset =
5198                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5199         rec->length = btrfs_dev_extent_length(leaf, ptr);
5200         rec->cache.size = rec->length;
5201
5202         INIT_LIST_HEAD(&rec->chunk_list);
5203         INIT_LIST_HEAD(&rec->device_list);
5204
5205         return rec;
5206 }
5207
5208 static int
5209 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5210                            struct btrfs_key *key, struct extent_buffer *eb,
5211                            int slot)
5212 {
5213         struct device_extent_record *rec;
5214         int ret;
5215
5216         rec = btrfs_new_device_extent_record(eb, key, slot);
5217         ret = insert_device_extent_record(dev_extent_cache, rec);
5218         if (ret) {
5219                 fprintf(stderr,
5220                         "Device extent[%llu, %llu, %llu] existed.\n",
5221                         rec->objectid, rec->offset, rec->length);
5222                 free(rec);
5223         }
5224
5225         return ret;
5226 }
5227
5228 static int process_extent_item(struct btrfs_root *root,
5229                                struct cache_tree *extent_cache,
5230                                struct extent_buffer *eb, int slot)
5231 {
5232         struct btrfs_extent_item *ei;
5233         struct btrfs_extent_inline_ref *iref;
5234         struct btrfs_extent_data_ref *dref;
5235         struct btrfs_shared_data_ref *sref;
5236         struct btrfs_key key;
5237         struct extent_record tmpl;
5238         unsigned long end;
5239         unsigned long ptr;
5240         int type;
5241         u32 item_size = btrfs_item_size_nr(eb, slot);
5242         u64 refs = 0;
5243         u64 offset;
5244         u64 num_bytes;
5245         int metadata = 0;
5246
5247         btrfs_item_key_to_cpu(eb, &key, slot);
5248
5249         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5250                 metadata = 1;
5251                 num_bytes = root->nodesize;
5252         } else {
5253                 num_bytes = key.offset;
5254         }
5255
5256         if (item_size < sizeof(*ei)) {
5257 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5258                 struct btrfs_extent_item_v0 *ei0;
5259                 BUG_ON(item_size != sizeof(*ei0));
5260                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5261                 refs = btrfs_extent_refs_v0(eb, ei0);
5262 #else
5263                 BUG();
5264 #endif
5265                 memset(&tmpl, 0, sizeof(tmpl));
5266                 tmpl.start = key.objectid;
5267                 tmpl.nr = num_bytes;
5268                 tmpl.extent_item_refs = refs;
5269                 tmpl.metadata = metadata;
5270                 tmpl.found_rec = 1;
5271                 tmpl.max_size = num_bytes;
5272
5273                 return add_extent_rec(extent_cache, &tmpl);
5274         }
5275
5276         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5277         refs = btrfs_extent_refs(eb, ei);
5278         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5279                 metadata = 1;
5280         else
5281                 metadata = 0;
5282
5283         memset(&tmpl, 0, sizeof(tmpl));
5284         tmpl.start = key.objectid;
5285         tmpl.nr = num_bytes;
5286         tmpl.extent_item_refs = refs;
5287         tmpl.metadata = metadata;
5288         tmpl.found_rec = 1;
5289         tmpl.max_size = num_bytes;
5290         add_extent_rec(extent_cache, &tmpl);
5291
5292         ptr = (unsigned long)(ei + 1);
5293         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5294             key.type == BTRFS_EXTENT_ITEM_KEY)
5295                 ptr += sizeof(struct btrfs_tree_block_info);
5296
5297         end = (unsigned long)ei + item_size;
5298         while (ptr < end) {
5299                 iref = (struct btrfs_extent_inline_ref *)ptr;
5300                 type = btrfs_extent_inline_ref_type(eb, iref);
5301                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5302                 switch (type) {
5303                 case BTRFS_TREE_BLOCK_REF_KEY:
5304                         add_tree_backref(extent_cache, key.objectid,
5305                                          0, offset, 0);
5306                         break;
5307                 case BTRFS_SHARED_BLOCK_REF_KEY:
5308                         add_tree_backref(extent_cache, key.objectid,
5309                                          offset, 0, 0);
5310                         break;
5311                 case BTRFS_EXTENT_DATA_REF_KEY:
5312                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5313                         add_data_backref(extent_cache, key.objectid, 0,
5314                                         btrfs_extent_data_ref_root(eb, dref),
5315                                         btrfs_extent_data_ref_objectid(eb,
5316                                                                        dref),
5317                                         btrfs_extent_data_ref_offset(eb, dref),
5318                                         btrfs_extent_data_ref_count(eb, dref),
5319                                         0, num_bytes);
5320                         break;
5321                 case BTRFS_SHARED_DATA_REF_KEY:
5322                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5323                         add_data_backref(extent_cache, key.objectid, offset,
5324                                         0, 0, 0,
5325                                         btrfs_shared_data_ref_count(eb, sref),
5326                                         0, num_bytes);
5327                         break;
5328                 default:
5329                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5330                                 key.objectid, key.type, num_bytes);
5331                         goto out;
5332                 }
5333                 ptr += btrfs_extent_inline_ref_size(type);
5334         }
5335         WARN_ON(ptr > end);
5336 out:
5337         return 0;
5338 }
5339
5340 static int check_cache_range(struct btrfs_root *root,
5341                              struct btrfs_block_group_cache *cache,
5342                              u64 offset, u64 bytes)
5343 {
5344         struct btrfs_free_space *entry;
5345         u64 *logical;
5346         u64 bytenr;
5347         int stripe_len;
5348         int i, nr, ret;
5349
5350         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5351                 bytenr = btrfs_sb_offset(i);
5352                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5353                                        cache->key.objectid, bytenr, 0,
5354                                        &logical, &nr, &stripe_len);
5355                 if (ret)
5356                         return ret;
5357
5358                 while (nr--) {
5359                         if (logical[nr] + stripe_len <= offset)
5360                                 continue;
5361                         if (offset + bytes <= logical[nr])
5362                                 continue;
5363                         if (logical[nr] == offset) {
5364                                 if (stripe_len >= bytes) {
5365                                         kfree(logical);
5366                                         return 0;
5367                                 }
5368                                 bytes -= stripe_len;
5369                                 offset += stripe_len;
5370                         } else if (logical[nr] < offset) {
5371                                 if (logical[nr] + stripe_len >=
5372                                     offset + bytes) {
5373                                         kfree(logical);
5374                                         return 0;
5375                                 }
5376                                 bytes = (offset + bytes) -
5377                                         (logical[nr] + stripe_len);
5378                                 offset = logical[nr] + stripe_len;
5379                         } else {
5380                                 /*
5381                                  * Could be tricky, the super may land in the
5382                                  * middle of the area we're checking.  First
5383                                  * check the easiest case, it's at the end.
5384                                  */
5385                                 if (logical[nr] + stripe_len >=
5386                                     bytes + offset) {
5387                                         bytes = logical[nr] - offset;
5388                                         continue;
5389                                 }
5390
5391                                 /* Check the left side */
5392                                 ret = check_cache_range(root, cache,
5393                                                         offset,
5394                                                         logical[nr] - offset);
5395                                 if (ret) {
5396                                         kfree(logical);
5397                                         return ret;
5398                                 }
5399
5400                                 /* Now we continue with the right side */
5401                                 bytes = (offset + bytes) -
5402                                         (logical[nr] + stripe_len);
5403                                 offset = logical[nr] + stripe_len;
5404                         }
5405                 }
5406
5407                 kfree(logical);
5408         }
5409
5410         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5411         if (!entry) {
5412                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5413                         offset, offset+bytes);
5414                 return -EINVAL;
5415         }
5416
5417         if (entry->offset != offset) {
5418                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5419                         entry->offset);
5420                 return -EINVAL;
5421         }
5422
5423         if (entry->bytes != bytes) {
5424                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5425                         bytes, entry->bytes, offset);
5426                 return -EINVAL;
5427         }
5428
5429         unlink_free_space(cache->free_space_ctl, entry);
5430         free(entry);
5431         return 0;
5432 }
5433
5434 static int verify_space_cache(struct btrfs_root *root,
5435                               struct btrfs_block_group_cache *cache)
5436 {
5437         struct btrfs_path *path;
5438         struct extent_buffer *leaf;
5439         struct btrfs_key key;
5440         u64 last;
5441         int ret = 0;
5442
5443         path = btrfs_alloc_path();
5444         if (!path)
5445                 return -ENOMEM;
5446
5447         root = root->fs_info->extent_root;
5448
5449         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5450
5451         key.objectid = last;
5452         key.offset = 0;
5453         key.type = BTRFS_EXTENT_ITEM_KEY;
5454
5455         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5456         if (ret < 0)
5457                 goto out;
5458         ret = 0;
5459         while (1) {
5460                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5461                         ret = btrfs_next_leaf(root, path);
5462                         if (ret < 0)
5463                                 goto out;
5464                         if (ret > 0) {
5465                                 ret = 0;
5466                                 break;
5467                         }
5468                 }
5469                 leaf = path->nodes[0];
5470                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5471                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5472                         break;
5473                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5474                     key.type != BTRFS_METADATA_ITEM_KEY) {
5475                         path->slots[0]++;
5476                         continue;
5477                 }
5478
5479                 if (last == key.objectid) {
5480                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5481                                 last = key.objectid + key.offset;
5482                         else
5483                                 last = key.objectid + root->nodesize;
5484                         path->slots[0]++;
5485                         continue;
5486                 }
5487
5488                 ret = check_cache_range(root, cache, last,
5489                                         key.objectid - last);
5490                 if (ret)
5491                         break;
5492                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5493                         last = key.objectid + key.offset;
5494                 else
5495                         last = key.objectid + root->nodesize;
5496                 path->slots[0]++;
5497         }
5498
5499         if (last < cache->key.objectid + cache->key.offset)
5500                 ret = check_cache_range(root, cache, last,
5501                                         cache->key.objectid +
5502                                         cache->key.offset - last);
5503
5504 out:
5505         btrfs_free_path(path);
5506
5507         if (!ret &&
5508             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5509                 fprintf(stderr, "There are still entries left in the space "
5510                         "cache\n");
5511                 ret = -EINVAL;
5512         }
5513
5514         return ret;
5515 }
5516
5517 static int check_space_cache(struct btrfs_root *root)
5518 {
5519         struct btrfs_block_group_cache *cache;
5520         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5521         int ret;
5522         int error = 0;
5523
5524         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5525             btrfs_super_generation(root->fs_info->super_copy) !=
5526             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5527                 printf("cache and super generation don't match, space cache "
5528                        "will be invalidated\n");
5529                 return 0;
5530         }
5531
5532         if (ctx.progress_enabled) {
5533                 ctx.tp = TASK_FREE_SPACE;
5534                 task_start(ctx.info);
5535         }
5536
5537         while (1) {
5538                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5539                 if (!cache)
5540                         break;
5541
5542                 start = cache->key.objectid + cache->key.offset;
5543                 if (!cache->free_space_ctl) {
5544                         if (btrfs_init_free_space_ctl(cache,
5545                                                       root->sectorsize)) {
5546                                 ret = -ENOMEM;
5547                                 break;
5548                         }
5549                 } else {
5550                         btrfs_remove_free_space_cache(cache);
5551                 }
5552
5553                 if (btrfs_fs_compat_ro(root->fs_info,
5554                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5555                         ret = exclude_super_stripes(root, cache);
5556                         if (ret) {
5557                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5558                                         strerror(-ret));
5559                                 error++;
5560                                 continue;
5561                         }
5562                         ret = load_free_space_tree(root->fs_info, cache);
5563                         free_excluded_extents(root, cache);
5564                         if (ret < 0) {
5565                                 fprintf(stderr, "could not load free space tree: %s\n",
5566                                         strerror(-ret));
5567                                 error++;
5568                                 continue;
5569                         }
5570                         error += ret;
5571                 } else {
5572                         ret = load_free_space_cache(root->fs_info, cache);
5573                         if (!ret)
5574                                 continue;
5575                 }
5576
5577                 ret = verify_space_cache(root, cache);
5578                 if (ret) {
5579                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5580                                 cache->key.objectid);
5581                         error++;
5582                 }
5583         }
5584
5585         task_stop(ctx.info);
5586
5587         return error ? -EINVAL : 0;
5588 }
5589
5590 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5591                         u64 num_bytes, unsigned long leaf_offset,
5592                         struct extent_buffer *eb) {
5593
5594         u64 offset = 0;
5595         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5596         char *data;
5597         unsigned long csum_offset;
5598         u32 csum;
5599         u32 csum_expected;
5600         u64 read_len;
5601         u64 data_checked = 0;
5602         u64 tmp;
5603         int ret = 0;
5604         int mirror;
5605         int num_copies;
5606
5607         if (num_bytes % root->sectorsize)
5608                 return -EINVAL;
5609
5610         data = malloc(num_bytes);
5611         if (!data)
5612                 return -ENOMEM;
5613
5614         while (offset < num_bytes) {
5615                 mirror = 0;
5616 again:
5617                 read_len = num_bytes - offset;
5618                 /* read as much space once a time */
5619                 ret = read_extent_data(root, data + offset,
5620                                 bytenr + offset, &read_len, mirror);
5621                 if (ret)
5622                         goto out;
5623                 data_checked = 0;
5624                 /* verify every 4k data's checksum */
5625                 while (data_checked < read_len) {
5626                         csum = ~(u32)0;
5627                         tmp = offset + data_checked;
5628
5629                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5630                                                csum, root->sectorsize);
5631                         btrfs_csum_final(csum, (char *)&csum);
5632
5633                         csum_offset = leaf_offset +
5634                                  tmp / root->sectorsize * csum_size;
5635                         read_extent_buffer(eb, (char *)&csum_expected,
5636                                            csum_offset, csum_size);
5637                         /* try another mirror */
5638                         if (csum != csum_expected) {
5639                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5640                                                 mirror, bytenr + tmp,
5641                                                 csum, csum_expected);
5642                                 num_copies = btrfs_num_copies(
5643                                                 &root->fs_info->mapping_tree,
5644                                                 bytenr, num_bytes);
5645                                 if (mirror < num_copies - 1) {
5646                                         mirror += 1;
5647                                         goto again;
5648                                 }
5649                         }
5650                         data_checked += root->sectorsize;
5651                 }
5652                 offset += read_len;
5653         }
5654 out:
5655         free(data);
5656         return ret;
5657 }
5658
5659 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5660                                u64 num_bytes)
5661 {
5662         struct btrfs_path *path;
5663         struct extent_buffer *leaf;
5664         struct btrfs_key key;
5665         int ret;
5666
5667         path = btrfs_alloc_path();
5668         if (!path) {
5669                 fprintf(stderr, "Error allocating path\n");
5670                 return -ENOMEM;
5671         }
5672
5673         key.objectid = bytenr;
5674         key.type = BTRFS_EXTENT_ITEM_KEY;
5675         key.offset = (u64)-1;
5676
5677 again:
5678         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5679                                 0, 0);
5680         if (ret < 0) {
5681                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5682                 btrfs_free_path(path);
5683                 return ret;
5684         } else if (ret) {
5685                 if (path->slots[0] > 0) {
5686                         path->slots[0]--;
5687                 } else {
5688                         ret = btrfs_prev_leaf(root, path);
5689                         if (ret < 0) {
5690                                 goto out;
5691                         } else if (ret > 0) {
5692                                 ret = 0;
5693                                 goto out;
5694                         }
5695                 }
5696         }
5697
5698         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5699
5700         /*
5701          * Block group items come before extent items if they have the same
5702          * bytenr, so walk back one more just in case.  Dear future traveller,
5703          * first congrats on mastering time travel.  Now if it's not too much
5704          * trouble could you go back to 2006 and tell Chris to make the
5705          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5706          * EXTENT_ITEM_KEY please?
5707          */
5708         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5709                 if (path->slots[0] > 0) {
5710                         path->slots[0]--;
5711                 } else {
5712                         ret = btrfs_prev_leaf(root, path);
5713                         if (ret < 0) {
5714                                 goto out;
5715                         } else if (ret > 0) {
5716                                 ret = 0;
5717                                 goto out;
5718                         }
5719                 }
5720                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5721         }
5722
5723         while (num_bytes) {
5724                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5725                         ret = btrfs_next_leaf(root, path);
5726                         if (ret < 0) {
5727                                 fprintf(stderr, "Error going to next leaf "
5728                                         "%d\n", ret);
5729                                 btrfs_free_path(path);
5730                                 return ret;
5731                         } else if (ret) {
5732                                 break;
5733                         }
5734                 }
5735                 leaf = path->nodes[0];
5736                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5737                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5738                         path->slots[0]++;
5739                         continue;
5740                 }
5741                 if (key.objectid + key.offset < bytenr) {
5742                         path->slots[0]++;
5743                         continue;
5744                 }
5745                 if (key.objectid > bytenr + num_bytes)
5746                         break;
5747
5748                 if (key.objectid == bytenr) {
5749                         if (key.offset >= num_bytes) {
5750                                 num_bytes = 0;
5751                                 break;
5752                         }
5753                         num_bytes -= key.offset;
5754                         bytenr += key.offset;
5755                 } else if (key.objectid < bytenr) {
5756                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5757                                 num_bytes = 0;
5758                                 break;
5759                         }
5760                         num_bytes = (bytenr + num_bytes) -
5761                                 (key.objectid + key.offset);
5762                         bytenr = key.objectid + key.offset;
5763                 } else {
5764                         if (key.objectid + key.offset < bytenr + num_bytes) {
5765                                 u64 new_start = key.objectid + key.offset;
5766                                 u64 new_bytes = bytenr + num_bytes - new_start;
5767
5768                                 /*
5769                                  * Weird case, the extent is in the middle of
5770                                  * our range, we'll have to search one side
5771                                  * and then the other.  Not sure if this happens
5772                                  * in real life, but no harm in coding it up
5773                                  * anyway just in case.
5774                                  */
5775                                 btrfs_release_path(path);
5776                                 ret = check_extent_exists(root, new_start,
5777                                                           new_bytes);
5778                                 if (ret) {
5779                                         fprintf(stderr, "Right section didn't "
5780                                                 "have a record\n");
5781                                         break;
5782                                 }
5783                                 num_bytes = key.objectid - bytenr;
5784                                 goto again;
5785                         }
5786                         num_bytes = key.objectid - bytenr;
5787                 }
5788                 path->slots[0]++;
5789         }
5790         ret = 0;
5791
5792 out:
5793         if (num_bytes && !ret) {
5794                 fprintf(stderr, "There are no extents for csum range "
5795                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5796                 ret = 1;
5797         }
5798
5799         btrfs_free_path(path);
5800         return ret;
5801 }
5802
5803 static int check_csums(struct btrfs_root *root)
5804 {
5805         struct btrfs_path *path;
5806         struct extent_buffer *leaf;
5807         struct btrfs_key key;
5808         u64 offset = 0, num_bytes = 0;
5809         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5810         int errors = 0;
5811         int ret;
5812         u64 data_len;
5813         unsigned long leaf_offset;
5814
5815         root = root->fs_info->csum_root;
5816         if (!extent_buffer_uptodate(root->node)) {
5817                 fprintf(stderr, "No valid csum tree found\n");
5818                 return -ENOENT;
5819         }
5820
5821         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5822         key.type = BTRFS_EXTENT_CSUM_KEY;
5823         key.offset = 0;
5824
5825         path = btrfs_alloc_path();
5826         if (!path)
5827                 return -ENOMEM;
5828
5829         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5830         if (ret < 0) {
5831                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5832                 btrfs_free_path(path);
5833                 return ret;
5834         }
5835
5836         if (ret > 0 && path->slots[0])
5837                 path->slots[0]--;
5838         ret = 0;
5839
5840         while (1) {
5841                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5842                         ret = btrfs_next_leaf(root, path);
5843                         if (ret < 0) {
5844                                 fprintf(stderr, "Error going to next leaf "
5845                                         "%d\n", ret);
5846                                 break;
5847                         }
5848                         if (ret)
5849                                 break;
5850                 }
5851                 leaf = path->nodes[0];
5852
5853                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5854                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5855                         path->slots[0]++;
5856                         continue;
5857                 }
5858
5859                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5860                               csum_size) * root->sectorsize;
5861                 if (!check_data_csum)
5862                         goto skip_csum_check;
5863                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5864                 ret = check_extent_csums(root, key.offset, data_len,
5865                                          leaf_offset, leaf);
5866                 if (ret)
5867                         break;
5868 skip_csum_check:
5869                 if (!num_bytes) {
5870                         offset = key.offset;
5871                 } else if (key.offset != offset + num_bytes) {
5872                         ret = check_extent_exists(root, offset, num_bytes);
5873                         if (ret) {
5874                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5875                                         "there is no extent record\n",
5876                                         offset, offset+num_bytes);
5877                                 errors++;
5878                         }
5879                         offset = key.offset;
5880                         num_bytes = 0;
5881                 }
5882                 num_bytes += data_len;
5883                 path->slots[0]++;
5884         }
5885
5886         btrfs_free_path(path);
5887         return errors;
5888 }
5889
5890 static int is_dropped_key(struct btrfs_key *key,
5891                           struct btrfs_key *drop_key) {
5892         if (key->objectid < drop_key->objectid)
5893                 return 1;
5894         else if (key->objectid == drop_key->objectid) {
5895                 if (key->type < drop_key->type)
5896                         return 1;
5897                 else if (key->type == drop_key->type) {
5898                         if (key->offset < drop_key->offset)
5899                                 return 1;
5900                 }
5901         }
5902         return 0;
5903 }
5904
5905 /*
5906  * Here are the rules for FULL_BACKREF.
5907  *
5908  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5909  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5910  *      FULL_BACKREF set.
5911  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
5912  *    if it happened after the relocation occurred since we'll have dropped the
5913  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5914  *    have no real way to know for sure.
5915  *
5916  * We process the blocks one root at a time, and we start from the lowest root
5917  * objectid and go to the highest.  So we can just lookup the owner backref for
5918  * the record and if we don't find it then we know it doesn't exist and we have
5919  * a FULL BACKREF.
5920  *
5921  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5922  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5923  * be set or not and then we can check later once we've gathered all the refs.
5924  */
5925 static int calc_extent_flag(struct btrfs_root *root,
5926                            struct cache_tree *extent_cache,
5927                            struct extent_buffer *buf,
5928                            struct root_item_record *ri,
5929                            u64 *flags)
5930 {
5931         struct extent_record *rec;
5932         struct cache_extent *cache;
5933         struct tree_backref *tback;
5934         u64 owner = 0;
5935
5936         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5937         /* we have added this extent before */
5938         BUG_ON(!cache);
5939         rec = container_of(cache, struct extent_record, cache);
5940
5941         /*
5942          * Except file/reloc tree, we can not have
5943          * FULL BACKREF MODE
5944          */
5945         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5946                 goto normal;
5947         /*
5948          * root node
5949          */
5950         if (buf->start == ri->bytenr)
5951                 goto normal;
5952
5953         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5954                 goto full_backref;
5955
5956         owner = btrfs_header_owner(buf);
5957         if (owner == ri->objectid)
5958                 goto normal;
5959
5960         tback = find_tree_backref(rec, 0, owner);
5961         if (!tback)
5962                 goto full_backref;
5963 normal:
5964         *flags = 0;
5965         if (rec->flag_block_full_backref != FLAG_UNSET &&
5966             rec->flag_block_full_backref != 0)
5967                 rec->bad_full_backref = 1;
5968         return 0;
5969 full_backref:
5970         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5971         if (rec->flag_block_full_backref != FLAG_UNSET &&
5972             rec->flag_block_full_backref != 1)
5973                 rec->bad_full_backref = 1;
5974         return 0;
5975 }
5976
5977 static int run_next_block(struct btrfs_root *root,
5978                           struct block_info *bits,
5979                           int bits_nr,
5980                           u64 *last,
5981                           struct cache_tree *pending,
5982                           struct cache_tree *seen,
5983                           struct cache_tree *reada,
5984                           struct cache_tree *nodes,
5985                           struct cache_tree *extent_cache,
5986                           struct cache_tree *chunk_cache,
5987                           struct rb_root *dev_cache,
5988                           struct block_group_tree *block_group_cache,
5989                           struct device_extent_tree *dev_extent_cache,
5990                           struct root_item_record *ri)
5991 {
5992         struct extent_buffer *buf;
5993         struct extent_record *rec = NULL;
5994         u64 bytenr;
5995         u32 size;
5996         u64 parent;
5997         u64 owner;
5998         u64 flags;
5999         u64 ptr;
6000         u64 gen = 0;
6001         int ret = 0;
6002         int i;
6003         int nritems;
6004         struct btrfs_key key;
6005         struct cache_extent *cache;
6006         int reada_bits;
6007
6008         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6009                                     bits_nr, &reada_bits);
6010         if (nritems == 0)
6011                 return 1;
6012
6013         if (!reada_bits) {
6014                 for(i = 0; i < nritems; i++) {
6015                         ret = add_cache_extent(reada, bits[i].start,
6016                                                bits[i].size);
6017                         if (ret == -EEXIST)
6018                                 continue;
6019
6020                         /* fixme, get the parent transid */
6021                         readahead_tree_block(root, bits[i].start,
6022                                              bits[i].size, 0);
6023                 }
6024         }
6025         *last = bits[0].start;
6026         bytenr = bits[0].start;
6027         size = bits[0].size;
6028
6029         cache = lookup_cache_extent(pending, bytenr, size);
6030         if (cache) {
6031                 remove_cache_extent(pending, cache);
6032                 free(cache);
6033         }
6034         cache = lookup_cache_extent(reada, bytenr, size);
6035         if (cache) {
6036                 remove_cache_extent(reada, cache);
6037                 free(cache);
6038         }
6039         cache = lookup_cache_extent(nodes, bytenr, size);
6040         if (cache) {
6041                 remove_cache_extent(nodes, cache);
6042                 free(cache);
6043         }
6044         cache = lookup_cache_extent(extent_cache, bytenr, size);
6045         if (cache) {
6046                 rec = container_of(cache, struct extent_record, cache);
6047                 gen = rec->parent_generation;
6048         }
6049
6050         /* fixme, get the real parent transid */
6051         buf = read_tree_block(root, bytenr, size, gen);
6052         if (!extent_buffer_uptodate(buf)) {
6053                 record_bad_block_io(root->fs_info,
6054                                     extent_cache, bytenr, size);
6055                 goto out;
6056         }
6057
6058         nritems = btrfs_header_nritems(buf);
6059
6060         flags = 0;
6061         if (!init_extent_tree) {
6062                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6063                                        btrfs_header_level(buf), 1, NULL,
6064                                        &flags);
6065                 if (ret < 0) {
6066                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6067                         if (ret < 0) {
6068                                 fprintf(stderr, "Couldn't calc extent flags\n");
6069                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6070                         }
6071                 }
6072         } else {
6073                 flags = 0;
6074                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6075                 if (ret < 0) {
6076                         fprintf(stderr, "Couldn't calc extent flags\n");
6077                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6078                 }
6079         }
6080
6081         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6082                 if (ri != NULL &&
6083                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6084                     ri->objectid == btrfs_header_owner(buf)) {
6085                         /*
6086                          * Ok we got to this block from it's original owner and
6087                          * we have FULL_BACKREF set.  Relocation can leave
6088                          * converted blocks over so this is altogether possible,
6089                          * however it's not possible if the generation > the
6090                          * last snapshot, so check for this case.
6091                          */
6092                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6093                             btrfs_header_generation(buf) > ri->last_snapshot) {
6094                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6095                                 rec->bad_full_backref = 1;
6096                         }
6097                 }
6098         } else {
6099                 if (ri != NULL &&
6100                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6101                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6102                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6103                         rec->bad_full_backref = 1;
6104                 }
6105         }
6106
6107         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6108                 rec->flag_block_full_backref = 1;
6109                 parent = bytenr;
6110                 owner = 0;
6111         } else {
6112                 rec->flag_block_full_backref = 0;
6113                 parent = 0;
6114                 owner = btrfs_header_owner(buf);
6115         }
6116
6117         ret = check_block(root, extent_cache, buf, flags);
6118         if (ret)
6119                 goto out;
6120
6121         if (btrfs_is_leaf(buf)) {
6122                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6123                 for (i = 0; i < nritems; i++) {
6124                         struct btrfs_file_extent_item *fi;
6125                         btrfs_item_key_to_cpu(buf, &key, i);
6126                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6127                                 process_extent_item(root, extent_cache, buf,
6128                                                     i);
6129                                 continue;
6130                         }
6131                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6132                                 process_extent_item(root, extent_cache, buf,
6133                                                     i);
6134                                 continue;
6135                         }
6136                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6137                                 total_csum_bytes +=
6138                                         btrfs_item_size_nr(buf, i);
6139                                 continue;
6140                         }
6141                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6142                                 process_chunk_item(chunk_cache, &key, buf, i);
6143                                 continue;
6144                         }
6145                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6146                                 process_device_item(dev_cache, &key, buf, i);
6147                                 continue;
6148                         }
6149                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6150                                 process_block_group_item(block_group_cache,
6151                                         &key, buf, i);
6152                                 continue;
6153                         }
6154                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6155                                 process_device_extent_item(dev_extent_cache,
6156                                         &key, buf, i);
6157                                 continue;
6158
6159                         }
6160                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6161 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6162                                 process_extent_ref_v0(extent_cache, buf, i);
6163 #else
6164                                 BUG();
6165 #endif
6166                                 continue;
6167                         }
6168
6169                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6170                                 add_tree_backref(extent_cache, key.objectid, 0,
6171                                                  key.offset, 0);
6172                                 continue;
6173                         }
6174                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6175                                 add_tree_backref(extent_cache, key.objectid,
6176                                                  key.offset, 0, 0);
6177                                 continue;
6178                         }
6179                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6180                                 struct btrfs_extent_data_ref *ref;
6181                                 ref = btrfs_item_ptr(buf, i,
6182                                                 struct btrfs_extent_data_ref);
6183                                 add_data_backref(extent_cache,
6184                                         key.objectid, 0,
6185                                         btrfs_extent_data_ref_root(buf, ref),
6186                                         btrfs_extent_data_ref_objectid(buf,
6187                                                                        ref),
6188                                         btrfs_extent_data_ref_offset(buf, ref),
6189                                         btrfs_extent_data_ref_count(buf, ref),
6190                                         0, root->sectorsize);
6191                                 continue;
6192                         }
6193                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6194                                 struct btrfs_shared_data_ref *ref;
6195                                 ref = btrfs_item_ptr(buf, i,
6196                                                 struct btrfs_shared_data_ref);
6197                                 add_data_backref(extent_cache,
6198                                         key.objectid, key.offset, 0, 0, 0,
6199                                         btrfs_shared_data_ref_count(buf, ref),
6200                                         0, root->sectorsize);
6201                                 continue;
6202                         }
6203                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6204                                 struct bad_item *bad;
6205
6206                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6207                                         continue;
6208                                 if (!owner)
6209                                         continue;
6210                                 bad = malloc(sizeof(struct bad_item));
6211                                 if (!bad)
6212                                         continue;
6213                                 INIT_LIST_HEAD(&bad->list);
6214                                 memcpy(&bad->key, &key,
6215                                        sizeof(struct btrfs_key));
6216                                 bad->root_id = owner;
6217                                 list_add_tail(&bad->list, &delete_items);
6218                                 continue;
6219                         }
6220                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6221                                 continue;
6222                         fi = btrfs_item_ptr(buf, i,
6223                                             struct btrfs_file_extent_item);
6224                         if (btrfs_file_extent_type(buf, fi) ==
6225                             BTRFS_FILE_EXTENT_INLINE)
6226                                 continue;
6227                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6228                                 continue;
6229
6230                         data_bytes_allocated +=
6231                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6232                         if (data_bytes_allocated < root->sectorsize) {
6233                                 abort();
6234                         }
6235                         data_bytes_referenced +=
6236                                 btrfs_file_extent_num_bytes(buf, fi);
6237                         add_data_backref(extent_cache,
6238                                 btrfs_file_extent_disk_bytenr(buf, fi),
6239                                 parent, owner, key.objectid, key.offset -
6240                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6241                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6242                 }
6243         } else {
6244                 int level;
6245                 struct btrfs_key first_key;
6246
6247                 first_key.objectid = 0;
6248
6249                 if (nritems > 0)
6250                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6251                 level = btrfs_header_level(buf);
6252                 for (i = 0; i < nritems; i++) {
6253                         struct extent_record tmpl;
6254
6255                         ptr = btrfs_node_blockptr(buf, i);
6256                         size = root->nodesize;
6257                         btrfs_node_key_to_cpu(buf, &key, i);
6258                         if (ri != NULL) {
6259                                 if ((level == ri->drop_level)
6260                                     && is_dropped_key(&key, &ri->drop_key)) {
6261                                         continue;
6262                                 }
6263                         }
6264
6265                         memset(&tmpl, 0, sizeof(tmpl));
6266                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6267                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6268                         tmpl.start = ptr;
6269                         tmpl.nr = size;
6270                         tmpl.refs = 1;
6271                         tmpl.metadata = 1;
6272                         tmpl.max_size = size;
6273                         ret = add_extent_rec(extent_cache, &tmpl);
6274                         BUG_ON(ret);
6275
6276                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6277
6278                         if (level > 1) {
6279                                 add_pending(nodes, seen, ptr, size);
6280                         } else {
6281                                 add_pending(pending, seen, ptr, size);
6282                         }
6283                 }
6284                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6285                                       nritems) * sizeof(struct btrfs_key_ptr);
6286         }
6287         total_btree_bytes += buf->len;
6288         if (fs_root_objectid(btrfs_header_owner(buf)))
6289                 total_fs_tree_bytes += buf->len;
6290         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6291                 total_extent_tree_bytes += buf->len;
6292         if (!found_old_backref &&
6293             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6294             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6295             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6296                 found_old_backref = 1;
6297 out:
6298         free_extent_buffer(buf);
6299         return ret;
6300 }
6301
6302 static int add_root_to_pending(struct extent_buffer *buf,
6303                                struct cache_tree *extent_cache,
6304                                struct cache_tree *pending,
6305                                struct cache_tree *seen,
6306                                struct cache_tree *nodes,
6307                                u64 objectid)
6308 {
6309         struct extent_record tmpl;
6310
6311         if (btrfs_header_level(buf) > 0)
6312                 add_pending(nodes, seen, buf->start, buf->len);
6313         else
6314                 add_pending(pending, seen, buf->start, buf->len);
6315
6316         memset(&tmpl, 0, sizeof(tmpl));
6317         tmpl.start = buf->start;
6318         tmpl.nr = buf->len;
6319         tmpl.is_root = 1;
6320         tmpl.refs = 1;
6321         tmpl.metadata = 1;
6322         tmpl.max_size = buf->len;
6323         add_extent_rec(extent_cache, &tmpl);
6324
6325         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6326             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6327                 add_tree_backref(extent_cache, buf->start, buf->start,
6328                                  0, 1);
6329         else
6330                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6331         return 0;
6332 }
6333
6334 /* as we fix the tree, we might be deleting blocks that
6335  * we're tracking for repair.  This hook makes sure we
6336  * remove any backrefs for blocks as we are fixing them.
6337  */
6338 static int free_extent_hook(struct btrfs_trans_handle *trans,
6339                             struct btrfs_root *root,
6340                             u64 bytenr, u64 num_bytes, u64 parent,
6341                             u64 root_objectid, u64 owner, u64 offset,
6342                             int refs_to_drop)
6343 {
6344         struct extent_record *rec;
6345         struct cache_extent *cache;
6346         int is_data;
6347         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6348
6349         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6350         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6351         if (!cache)
6352                 return 0;
6353
6354         rec = container_of(cache, struct extent_record, cache);
6355         if (is_data) {
6356                 struct data_backref *back;
6357                 back = find_data_backref(rec, parent, root_objectid, owner,
6358                                          offset, 1, bytenr, num_bytes);
6359                 if (!back)
6360                         goto out;
6361                 if (back->node.found_ref) {
6362                         back->found_ref -= refs_to_drop;
6363                         if (rec->refs)
6364                                 rec->refs -= refs_to_drop;
6365                 }
6366                 if (back->node.found_extent_tree) {
6367                         back->num_refs -= refs_to_drop;
6368                         if (rec->extent_item_refs)
6369                                 rec->extent_item_refs -= refs_to_drop;
6370                 }
6371                 if (back->found_ref == 0)
6372                         back->node.found_ref = 0;
6373                 if (back->num_refs == 0)
6374                         back->node.found_extent_tree = 0;
6375
6376                 if (!back->node.found_extent_tree && back->node.found_ref) {
6377                         list_del(&back->node.list);
6378                         free(back);
6379                 }
6380         } else {
6381                 struct tree_backref *back;
6382                 back = find_tree_backref(rec, parent, root_objectid);
6383                 if (!back)
6384                         goto out;
6385                 if (back->node.found_ref) {
6386                         if (rec->refs)
6387                                 rec->refs--;
6388                         back->node.found_ref = 0;
6389                 }
6390                 if (back->node.found_extent_tree) {
6391                         if (rec->extent_item_refs)
6392                                 rec->extent_item_refs--;
6393                         back->node.found_extent_tree = 0;
6394                 }
6395                 if (!back->node.found_extent_tree && back->node.found_ref) {
6396                         list_del(&back->node.list);
6397                         free(back);
6398                 }
6399         }
6400         maybe_free_extent_rec(extent_cache, rec);
6401 out:
6402         return 0;
6403 }
6404
6405 static int delete_extent_records(struct btrfs_trans_handle *trans,
6406                                  struct btrfs_root *root,
6407                                  struct btrfs_path *path,
6408                                  u64 bytenr, u64 new_len)
6409 {
6410         struct btrfs_key key;
6411         struct btrfs_key found_key;
6412         struct extent_buffer *leaf;
6413         int ret;
6414         int slot;
6415
6416
6417         key.objectid = bytenr;
6418         key.type = (u8)-1;
6419         key.offset = (u64)-1;
6420
6421         while(1) {
6422                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6423                                         &key, path, 0, 1);
6424                 if (ret < 0)
6425                         break;
6426
6427                 if (ret > 0) {
6428                         ret = 0;
6429                         if (path->slots[0] == 0)
6430                                 break;
6431                         path->slots[0]--;
6432                 }
6433                 ret = 0;
6434
6435                 leaf = path->nodes[0];
6436                 slot = path->slots[0];
6437
6438                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6439                 if (found_key.objectid != bytenr)
6440                         break;
6441
6442                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6443                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6444                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6445                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6446                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6447                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6448                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6449                         btrfs_release_path(path);
6450                         if (found_key.type == 0) {
6451                                 if (found_key.offset == 0)
6452                                         break;
6453                                 key.offset = found_key.offset - 1;
6454                                 key.type = found_key.type;
6455                         }
6456                         key.type = found_key.type - 1;
6457                         key.offset = (u64)-1;
6458                         continue;
6459                 }
6460
6461                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6462                         found_key.objectid, found_key.type, found_key.offset);
6463
6464                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6465                 if (ret)
6466                         break;
6467                 btrfs_release_path(path);
6468
6469                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6470                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6471                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6472                                 found_key.offset : root->nodesize;
6473
6474                         ret = btrfs_update_block_group(trans, root, bytenr,
6475                                                        bytes, 0, 0);
6476                         if (ret)
6477                                 break;
6478                 }
6479         }
6480
6481         btrfs_release_path(path);
6482         return ret;
6483 }
6484
6485 /*
6486  * for a single backref, this will allocate a new extent
6487  * and add the backref to it.
6488  */
6489 static int record_extent(struct btrfs_trans_handle *trans,
6490                          struct btrfs_fs_info *info,
6491                          struct btrfs_path *path,
6492                          struct extent_record *rec,
6493                          struct extent_backref *back,
6494                          int allocated, u64 flags)
6495 {
6496         int ret;
6497         struct btrfs_root *extent_root = info->extent_root;
6498         struct extent_buffer *leaf;
6499         struct btrfs_key ins_key;
6500         struct btrfs_extent_item *ei;
6501         struct tree_backref *tback;
6502         struct data_backref *dback;
6503         struct btrfs_tree_block_info *bi;
6504
6505         if (!back->is_data)
6506                 rec->max_size = max_t(u64, rec->max_size,
6507                                     info->extent_root->nodesize);
6508
6509         if (!allocated) {
6510                 u32 item_size = sizeof(*ei);
6511
6512                 if (!back->is_data)
6513                         item_size += sizeof(*bi);
6514
6515                 ins_key.objectid = rec->start;
6516                 ins_key.offset = rec->max_size;
6517                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6518
6519                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6520                                         &ins_key, item_size);
6521                 if (ret)
6522                         goto fail;
6523
6524                 leaf = path->nodes[0];
6525                 ei = btrfs_item_ptr(leaf, path->slots[0],
6526                                     struct btrfs_extent_item);
6527
6528                 btrfs_set_extent_refs(leaf, ei, 0);
6529                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6530
6531                 if (back->is_data) {
6532                         btrfs_set_extent_flags(leaf, ei,
6533                                                BTRFS_EXTENT_FLAG_DATA);
6534                 } else {
6535                         struct btrfs_disk_key copy_key;;
6536
6537                         tback = to_tree_backref(back);
6538                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6539                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6540                                              sizeof(*bi));
6541
6542                         btrfs_set_disk_key_objectid(&copy_key,
6543                                                     rec->info_objectid);
6544                         btrfs_set_disk_key_type(&copy_key, 0);
6545                         btrfs_set_disk_key_offset(&copy_key, 0);
6546
6547                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6548                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6549
6550                         btrfs_set_extent_flags(leaf, ei,
6551                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6552                 }
6553
6554                 btrfs_mark_buffer_dirty(leaf);
6555                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6556                                                rec->max_size, 1, 0);
6557                 if (ret)
6558                         goto fail;
6559                 btrfs_release_path(path);
6560         }
6561
6562         if (back->is_data) {
6563                 u64 parent;
6564                 int i;
6565
6566                 dback = to_data_backref(back);
6567                 if (back->full_backref)
6568                         parent = dback->parent;
6569                 else
6570                         parent = 0;
6571
6572                 for (i = 0; i < dback->found_ref; i++) {
6573                         /* if parent != 0, we're doing a full backref
6574                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6575                          * just makes the backref allocator create a data
6576                          * backref
6577                          */
6578                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6579                                                    rec->start, rec->max_size,
6580                                                    parent,
6581                                                    dback->root,
6582                                                    parent ?
6583                                                    BTRFS_FIRST_FREE_OBJECTID :
6584                                                    dback->owner,
6585                                                    dback->offset);
6586                         if (ret)
6587                                 break;
6588                 }
6589                 fprintf(stderr, "adding new data backref"
6590                                 " on %llu %s %llu owner %llu"
6591                                 " offset %llu found %d\n",
6592                                 (unsigned long long)rec->start,
6593                                 back->full_backref ?
6594                                 "parent" : "root",
6595                                 back->full_backref ?
6596                                 (unsigned long long)parent :
6597                                 (unsigned long long)dback->root,
6598                                 (unsigned long long)dback->owner,
6599                                 (unsigned long long)dback->offset,
6600                                 dback->found_ref);
6601         } else {
6602                 u64 parent;
6603
6604                 tback = to_tree_backref(back);
6605                 if (back->full_backref)
6606                         parent = tback->parent;
6607                 else
6608                         parent = 0;
6609
6610                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6611                                            rec->start, rec->max_size,
6612                                            parent, tback->root, 0, 0);
6613                 fprintf(stderr, "adding new tree backref on "
6614                         "start %llu len %llu parent %llu root %llu\n",
6615                         rec->start, rec->max_size, parent, tback->root);
6616         }
6617 fail:
6618         btrfs_release_path(path);
6619         return ret;
6620 }
6621
6622 static struct extent_entry *find_entry(struct list_head *entries,
6623                                        u64 bytenr, u64 bytes)
6624 {
6625         struct extent_entry *entry = NULL;
6626
6627         list_for_each_entry(entry, entries, list) {
6628                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6629                         return entry;
6630         }
6631
6632         return NULL;
6633 }
6634
6635 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6636 {
6637         struct extent_entry *entry, *best = NULL, *prev = NULL;
6638
6639         list_for_each_entry(entry, entries, list) {
6640                 if (!prev) {
6641                         prev = entry;
6642                         continue;
6643                 }
6644
6645                 /*
6646                  * If there are as many broken entries as entries then we know
6647                  * not to trust this particular entry.
6648                  */
6649                 if (entry->broken == entry->count)
6650                         continue;
6651
6652                 /*
6653                  * If our current entry == best then we can't be sure our best
6654                  * is really the best, so we need to keep searching.
6655                  */
6656                 if (best && best->count == entry->count) {
6657                         prev = entry;
6658                         best = NULL;
6659                         continue;
6660                 }
6661
6662                 /* Prev == entry, not good enough, have to keep searching */
6663                 if (!prev->broken && prev->count == entry->count)
6664                         continue;
6665
6666                 if (!best)
6667                         best = (prev->count > entry->count) ? prev : entry;
6668                 else if (best->count < entry->count)
6669                         best = entry;
6670                 prev = entry;
6671         }
6672
6673         return best;
6674 }
6675
6676 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6677                       struct data_backref *dback, struct extent_entry *entry)
6678 {
6679         struct btrfs_trans_handle *trans;
6680         struct btrfs_root *root;
6681         struct btrfs_file_extent_item *fi;
6682         struct extent_buffer *leaf;
6683         struct btrfs_key key;
6684         u64 bytenr, bytes;
6685         int ret, err;
6686
6687         key.objectid = dback->root;
6688         key.type = BTRFS_ROOT_ITEM_KEY;
6689         key.offset = (u64)-1;
6690         root = btrfs_read_fs_root(info, &key);
6691         if (IS_ERR(root)) {
6692                 fprintf(stderr, "Couldn't find root for our ref\n");
6693                 return -EINVAL;
6694         }
6695
6696         /*
6697          * The backref points to the original offset of the extent if it was
6698          * split, so we need to search down to the offset we have and then walk
6699          * forward until we find the backref we're looking for.
6700          */
6701         key.objectid = dback->owner;
6702         key.type = BTRFS_EXTENT_DATA_KEY;
6703         key.offset = dback->offset;
6704         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6705         if (ret < 0) {
6706                 fprintf(stderr, "Error looking up ref %d\n", ret);
6707                 return ret;
6708         }
6709
6710         while (1) {
6711                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6712                         ret = btrfs_next_leaf(root, path);
6713                         if (ret) {
6714                                 fprintf(stderr, "Couldn't find our ref, next\n");
6715                                 return -EINVAL;
6716                         }
6717                 }
6718                 leaf = path->nodes[0];
6719                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6720                 if (key.objectid != dback->owner ||
6721                     key.type != BTRFS_EXTENT_DATA_KEY) {
6722                         fprintf(stderr, "Couldn't find our ref, search\n");
6723                         return -EINVAL;
6724                 }
6725                 fi = btrfs_item_ptr(leaf, path->slots[0],
6726                                     struct btrfs_file_extent_item);
6727                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6728                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6729
6730                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6731                         break;
6732                 path->slots[0]++;
6733         }
6734
6735         btrfs_release_path(path);
6736
6737         trans = btrfs_start_transaction(root, 1);
6738         if (IS_ERR(trans))
6739                 return PTR_ERR(trans);
6740
6741         /*
6742          * Ok we have the key of the file extent we want to fix, now we can cow
6743          * down to the thing and fix it.
6744          */
6745         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6746         if (ret < 0) {
6747                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6748                         key.objectid, key.type, key.offset, ret);
6749                 goto out;
6750         }
6751         if (ret > 0) {
6752                 fprintf(stderr, "Well that's odd, we just found this key "
6753                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6754                         key.offset);
6755                 ret = -EINVAL;
6756                 goto out;
6757         }
6758         leaf = path->nodes[0];
6759         fi = btrfs_item_ptr(leaf, path->slots[0],
6760                             struct btrfs_file_extent_item);
6761
6762         if (btrfs_file_extent_compression(leaf, fi) &&
6763             dback->disk_bytenr != entry->bytenr) {
6764                 fprintf(stderr, "Ref doesn't match the record start and is "
6765                         "compressed, please take a btrfs-image of this file "
6766                         "system and send it to a btrfs developer so they can "
6767                         "complete this functionality for bytenr %Lu\n",
6768                         dback->disk_bytenr);
6769                 ret = -EINVAL;
6770                 goto out;
6771         }
6772
6773         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6774                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6775         } else if (dback->disk_bytenr > entry->bytenr) {
6776                 u64 off_diff, offset;
6777
6778                 off_diff = dback->disk_bytenr - entry->bytenr;
6779                 offset = btrfs_file_extent_offset(leaf, fi);
6780                 if (dback->disk_bytenr + offset +
6781                     btrfs_file_extent_num_bytes(leaf, fi) >
6782                     entry->bytenr + entry->bytes) {
6783                         fprintf(stderr, "Ref is past the entry end, please "
6784                                 "take a btrfs-image of this file system and "
6785                                 "send it to a btrfs developer, ref %Lu\n",
6786                                 dback->disk_bytenr);
6787                         ret = -EINVAL;
6788                         goto out;
6789                 }
6790                 offset += off_diff;
6791                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6792                 btrfs_set_file_extent_offset(leaf, fi, offset);
6793         } else if (dback->disk_bytenr < entry->bytenr) {
6794                 u64 offset;
6795
6796                 offset = btrfs_file_extent_offset(leaf, fi);
6797                 if (dback->disk_bytenr + offset < entry->bytenr) {
6798                         fprintf(stderr, "Ref is before the entry start, please"
6799                                 " take a btrfs-image of this file system and "
6800                                 "send it to a btrfs developer, ref %Lu\n",
6801                                 dback->disk_bytenr);
6802                         ret = -EINVAL;
6803                         goto out;
6804                 }
6805
6806                 offset += dback->disk_bytenr;
6807                 offset -= entry->bytenr;
6808                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6809                 btrfs_set_file_extent_offset(leaf, fi, offset);
6810         }
6811
6812         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6813
6814         /*
6815          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6816          * only do this if we aren't using compression, otherwise it's a
6817          * trickier case.
6818          */
6819         if (!btrfs_file_extent_compression(leaf, fi))
6820                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6821         else
6822                 printf("ram bytes may be wrong?\n");
6823         btrfs_mark_buffer_dirty(leaf);
6824 out:
6825         err = btrfs_commit_transaction(trans, root);
6826         btrfs_release_path(path);
6827         return ret ? ret : err;
6828 }
6829
6830 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6831                            struct extent_record *rec)
6832 {
6833         struct extent_backref *back;
6834         struct data_backref *dback;
6835         struct extent_entry *entry, *best = NULL;
6836         LIST_HEAD(entries);
6837         int nr_entries = 0;
6838         int broken_entries = 0;
6839         int ret = 0;
6840         short mismatch = 0;
6841
6842         /*
6843          * Metadata is easy and the backrefs should always agree on bytenr and
6844          * size, if not we've got bigger issues.
6845          */
6846         if (rec->metadata)
6847                 return 0;
6848
6849         list_for_each_entry(back, &rec->backrefs, list) {
6850                 if (back->full_backref || !back->is_data)
6851                         continue;
6852
6853                 dback = to_data_backref(back);
6854
6855                 /*
6856                  * We only pay attention to backrefs that we found a real
6857                  * backref for.
6858                  */
6859                 if (dback->found_ref == 0)
6860                         continue;
6861
6862                 /*
6863                  * For now we only catch when the bytes don't match, not the
6864                  * bytenr.  We can easily do this at the same time, but I want
6865                  * to have a fs image to test on before we just add repair
6866                  * functionality willy-nilly so we know we won't screw up the
6867                  * repair.
6868                  */
6869
6870                 entry = find_entry(&entries, dback->disk_bytenr,
6871                                    dback->bytes);
6872                 if (!entry) {
6873                         entry = malloc(sizeof(struct extent_entry));
6874                         if (!entry) {
6875                                 ret = -ENOMEM;
6876                                 goto out;
6877                         }
6878                         memset(entry, 0, sizeof(*entry));
6879                         entry->bytenr = dback->disk_bytenr;
6880                         entry->bytes = dback->bytes;
6881                         list_add_tail(&entry->list, &entries);
6882                         nr_entries++;
6883                 }
6884
6885                 /*
6886                  * If we only have on entry we may think the entries agree when
6887                  * in reality they don't so we have to do some extra checking.
6888                  */
6889                 if (dback->disk_bytenr != rec->start ||
6890                     dback->bytes != rec->nr || back->broken)
6891                         mismatch = 1;
6892
6893                 if (back->broken) {
6894                         entry->broken++;
6895                         broken_entries++;
6896                 }
6897
6898                 entry->count++;
6899         }
6900
6901         /* Yay all the backrefs agree, carry on good sir */
6902         if (nr_entries <= 1 && !mismatch)
6903                 goto out;
6904
6905         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6906                 "%Lu\n", rec->start);
6907
6908         /*
6909          * First we want to see if the backrefs can agree amongst themselves who
6910          * is right, so figure out which one of the entries has the highest
6911          * count.
6912          */
6913         best = find_most_right_entry(&entries);
6914
6915         /*
6916          * Ok so we may have an even split between what the backrefs think, so
6917          * this is where we use the extent ref to see what it thinks.
6918          */
6919         if (!best) {
6920                 entry = find_entry(&entries, rec->start, rec->nr);
6921                 if (!entry && (!broken_entries || !rec->found_rec)) {
6922                         fprintf(stderr, "Backrefs don't agree with each other "
6923                                 "and extent record doesn't agree with anybody,"
6924                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6925                                 rec->start, rec->nr);
6926                         ret = -EINVAL;
6927                         goto out;
6928                 } else if (!entry) {
6929                         /*
6930                          * Ok our backrefs were broken, we'll assume this is the
6931                          * correct value and add an entry for this range.
6932                          */
6933                         entry = malloc(sizeof(struct extent_entry));
6934                         if (!entry) {
6935                                 ret = -ENOMEM;
6936                                 goto out;
6937                         }
6938                         memset(entry, 0, sizeof(*entry));
6939                         entry->bytenr = rec->start;
6940                         entry->bytes = rec->nr;
6941                         list_add_tail(&entry->list, &entries);
6942                         nr_entries++;
6943                 }
6944                 entry->count++;
6945                 best = find_most_right_entry(&entries);
6946                 if (!best) {
6947                         fprintf(stderr, "Backrefs and extent record evenly "
6948                                 "split on who is right, this is going to "
6949                                 "require user input to fix bytenr %Lu bytes "
6950                                 "%Lu\n", rec->start, rec->nr);
6951                         ret = -EINVAL;
6952                         goto out;
6953                 }
6954         }
6955
6956         /*
6957          * I don't think this can happen currently as we'll abort() if we catch
6958          * this case higher up, but in case somebody removes that we still can't
6959          * deal with it properly here yet, so just bail out of that's the case.
6960          */
6961         if (best->bytenr != rec->start) {
6962                 fprintf(stderr, "Extent start and backref starts don't match, "
6963                         "please use btrfs-image on this file system and send "
6964                         "it to a btrfs developer so they can make fsck fix "
6965                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6966                         rec->start, rec->nr);
6967                 ret = -EINVAL;
6968                 goto out;
6969         }
6970
6971         /*
6972          * Ok great we all agreed on an extent record, let's go find the real
6973          * references and fix up the ones that don't match.
6974          */
6975         list_for_each_entry(back, &rec->backrefs, list) {
6976                 if (back->full_backref || !back->is_data)
6977                         continue;
6978
6979                 dback = to_data_backref(back);
6980
6981                 /*
6982                  * Still ignoring backrefs that don't have a real ref attached
6983                  * to them.
6984                  */
6985                 if (dback->found_ref == 0)
6986                         continue;
6987
6988                 if (dback->bytes == best->bytes &&
6989                     dback->disk_bytenr == best->bytenr)
6990                         continue;
6991
6992                 ret = repair_ref(info, path, dback, best);
6993                 if (ret)
6994                         goto out;
6995         }
6996
6997         /*
6998          * Ok we messed with the actual refs, which means we need to drop our
6999          * entire cache and go back and rescan.  I know this is a huge pain and
7000          * adds a lot of extra work, but it's the only way to be safe.  Once all
7001          * the backrefs agree we may not need to do anything to the extent
7002          * record itself.
7003          */
7004         ret = -EAGAIN;
7005 out:
7006         while (!list_empty(&entries)) {
7007                 entry = list_entry(entries.next, struct extent_entry, list);
7008                 list_del_init(&entry->list);
7009                 free(entry);
7010         }
7011         return ret;
7012 }
7013
7014 static int process_duplicates(struct btrfs_root *root,
7015                               struct cache_tree *extent_cache,
7016                               struct extent_record *rec)
7017 {
7018         struct extent_record *good, *tmp;
7019         struct cache_extent *cache;
7020         int ret;
7021
7022         /*
7023          * If we found a extent record for this extent then return, or if we
7024          * have more than one duplicate we are likely going to need to delete
7025          * something.
7026          */
7027         if (rec->found_rec || rec->num_duplicates > 1)
7028                 return 0;
7029
7030         /* Shouldn't happen but just in case */
7031         BUG_ON(!rec->num_duplicates);
7032
7033         /*
7034          * So this happens if we end up with a backref that doesn't match the
7035          * actual extent entry.  So either the backref is bad or the extent
7036          * entry is bad.  Either way we want to have the extent_record actually
7037          * reflect what we found in the extent_tree, so we need to take the
7038          * duplicate out and use that as the extent_record since the only way we
7039          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7040          */
7041         remove_cache_extent(extent_cache, &rec->cache);
7042
7043         good = to_extent_record(rec->dups.next);
7044         list_del_init(&good->list);
7045         INIT_LIST_HEAD(&good->backrefs);
7046         INIT_LIST_HEAD(&good->dups);
7047         good->cache.start = good->start;
7048         good->cache.size = good->nr;
7049         good->content_checked = 0;
7050         good->owner_ref_checked = 0;
7051         good->num_duplicates = 0;
7052         good->refs = rec->refs;
7053         list_splice_init(&rec->backrefs, &good->backrefs);
7054         while (1) {
7055                 cache = lookup_cache_extent(extent_cache, good->start,
7056                                             good->nr);
7057                 if (!cache)
7058                         break;
7059                 tmp = container_of(cache, struct extent_record, cache);
7060
7061                 /*
7062                  * If we find another overlapping extent and it's found_rec is
7063                  * set then it's a duplicate and we need to try and delete
7064                  * something.
7065                  */
7066                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7067                         if (list_empty(&good->list))
7068                                 list_add_tail(&good->list,
7069                                               &duplicate_extents);
7070                         good->num_duplicates += tmp->num_duplicates + 1;
7071                         list_splice_init(&tmp->dups, &good->dups);
7072                         list_del_init(&tmp->list);
7073                         list_add_tail(&tmp->list, &good->dups);
7074                         remove_cache_extent(extent_cache, &tmp->cache);
7075                         continue;
7076                 }
7077
7078                 /*
7079                  * Ok we have another non extent item backed extent rec, so lets
7080                  * just add it to this extent and carry on like we did above.
7081                  */
7082                 good->refs += tmp->refs;
7083                 list_splice_init(&tmp->backrefs, &good->backrefs);
7084                 remove_cache_extent(extent_cache, &tmp->cache);
7085                 free(tmp);
7086         }
7087         ret = insert_cache_extent(extent_cache, &good->cache);
7088         BUG_ON(ret);
7089         free(rec);
7090         return good->num_duplicates ? 0 : 1;
7091 }
7092
7093 static int delete_duplicate_records(struct btrfs_root *root,
7094                                     struct extent_record *rec)
7095 {
7096         struct btrfs_trans_handle *trans;
7097         LIST_HEAD(delete_list);
7098         struct btrfs_path *path;
7099         struct extent_record *tmp, *good, *n;
7100         int nr_del = 0;
7101         int ret = 0, err;
7102         struct btrfs_key key;
7103
7104         path = btrfs_alloc_path();
7105         if (!path) {
7106                 ret = -ENOMEM;
7107                 goto out;
7108         }
7109
7110         good = rec;
7111         /* Find the record that covers all of the duplicates. */
7112         list_for_each_entry(tmp, &rec->dups, list) {
7113                 if (good->start < tmp->start)
7114                         continue;
7115                 if (good->nr > tmp->nr)
7116                         continue;
7117
7118                 if (tmp->start + tmp->nr < good->start + good->nr) {
7119                         fprintf(stderr, "Ok we have overlapping extents that "
7120                                 "aren't completely covered by each other, this "
7121                                 "is going to require more careful thought.  "
7122                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7123                                 tmp->start, tmp->nr, good->start, good->nr);
7124                         abort();
7125                 }
7126                 good = tmp;
7127         }
7128
7129         if (good != rec)
7130                 list_add_tail(&rec->list, &delete_list);
7131
7132         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7133                 if (tmp == good)
7134                         continue;
7135                 list_move_tail(&tmp->list, &delete_list);
7136         }
7137
7138         root = root->fs_info->extent_root;
7139         trans = btrfs_start_transaction(root, 1);
7140         if (IS_ERR(trans)) {
7141                 ret = PTR_ERR(trans);
7142                 goto out;
7143         }
7144
7145         list_for_each_entry(tmp, &delete_list, list) {
7146                 if (tmp->found_rec == 0)
7147                         continue;
7148                 key.objectid = tmp->start;
7149                 key.type = BTRFS_EXTENT_ITEM_KEY;
7150                 key.offset = tmp->nr;
7151
7152                 /* Shouldn't happen but just in case */
7153                 if (tmp->metadata) {
7154                         fprintf(stderr, "Well this shouldn't happen, extent "
7155                                 "record overlaps but is metadata? "
7156                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7157                         abort();
7158                 }
7159
7160                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7161                 if (ret) {
7162                         if (ret > 0)
7163                                 ret = -EINVAL;
7164                         break;
7165                 }
7166                 ret = btrfs_del_item(trans, root, path);
7167                 if (ret)
7168                         break;
7169                 btrfs_release_path(path);
7170                 nr_del++;
7171         }
7172         err = btrfs_commit_transaction(trans, root);
7173         if (err && !ret)
7174                 ret = err;
7175 out:
7176         while (!list_empty(&delete_list)) {
7177                 tmp = to_extent_record(delete_list.next);
7178                 list_del_init(&tmp->list);
7179                 if (tmp == rec)
7180                         continue;
7181                 free(tmp);
7182         }
7183
7184         while (!list_empty(&rec->dups)) {
7185                 tmp = to_extent_record(rec->dups.next);
7186                 list_del_init(&tmp->list);
7187                 free(tmp);
7188         }
7189
7190         btrfs_free_path(path);
7191
7192         if (!ret && !nr_del)
7193                 rec->num_duplicates = 0;
7194
7195         return ret ? ret : nr_del;
7196 }
7197
7198 static int find_possible_backrefs(struct btrfs_fs_info *info,
7199                                   struct btrfs_path *path,
7200                                   struct cache_tree *extent_cache,
7201                                   struct extent_record *rec)
7202 {
7203         struct btrfs_root *root;
7204         struct extent_backref *back;
7205         struct data_backref *dback;
7206         struct cache_extent *cache;
7207         struct btrfs_file_extent_item *fi;
7208         struct btrfs_key key;
7209         u64 bytenr, bytes;
7210         int ret;
7211
7212         list_for_each_entry(back, &rec->backrefs, list) {
7213                 /* Don't care about full backrefs (poor unloved backrefs) */
7214                 if (back->full_backref || !back->is_data)
7215                         continue;
7216
7217                 dback = to_data_backref(back);
7218
7219                 /* We found this one, we don't need to do a lookup */
7220                 if (dback->found_ref)
7221                         continue;
7222
7223                 key.objectid = dback->root;
7224                 key.type = BTRFS_ROOT_ITEM_KEY;
7225                 key.offset = (u64)-1;
7226
7227                 root = btrfs_read_fs_root(info, &key);
7228
7229                 /* No root, definitely a bad ref, skip */
7230                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7231                         continue;
7232                 /* Other err, exit */
7233                 if (IS_ERR(root))
7234                         return PTR_ERR(root);
7235
7236                 key.objectid = dback->owner;
7237                 key.type = BTRFS_EXTENT_DATA_KEY;
7238                 key.offset = dback->offset;
7239                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7240                 if (ret) {
7241                         btrfs_release_path(path);
7242                         if (ret < 0)
7243                                 return ret;
7244                         /* Didn't find it, we can carry on */
7245                         ret = 0;
7246                         continue;
7247                 }
7248
7249                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7250                                     struct btrfs_file_extent_item);
7251                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7252                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7253                 btrfs_release_path(path);
7254                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7255                 if (cache) {
7256                         struct extent_record *tmp;
7257                         tmp = container_of(cache, struct extent_record, cache);
7258
7259                         /*
7260                          * If we found an extent record for the bytenr for this
7261                          * particular backref then we can't add it to our
7262                          * current extent record.  We only want to add backrefs
7263                          * that don't have a corresponding extent item in the
7264                          * extent tree since they likely belong to this record
7265                          * and we need to fix it if it doesn't match bytenrs.
7266                          */
7267                         if  (tmp->found_rec)
7268                                 continue;
7269                 }
7270
7271                 dback->found_ref += 1;
7272                 dback->disk_bytenr = bytenr;
7273                 dback->bytes = bytes;
7274
7275                 /*
7276                  * Set this so the verify backref code knows not to trust the
7277                  * values in this backref.
7278                  */
7279                 back->broken = 1;
7280         }
7281
7282         return 0;
7283 }
7284
7285 /*
7286  * Record orphan data ref into corresponding root.
7287  *
7288  * Return 0 if the extent item contains data ref and recorded.
7289  * Return 1 if the extent item contains no useful data ref
7290  *   On that case, it may contains only shared_dataref or metadata backref
7291  *   or the file extent exists(this should be handled by the extent bytenr
7292  *   recovery routine)
7293  * Return <0 if something goes wrong.
7294  */
7295 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7296                                       struct extent_record *rec)
7297 {
7298         struct btrfs_key key;
7299         struct btrfs_root *dest_root;
7300         struct extent_backref *back;
7301         struct data_backref *dback;
7302         struct orphan_data_extent *orphan;
7303         struct btrfs_path *path;
7304         int recorded_data_ref = 0;
7305         int ret = 0;
7306
7307         if (rec->metadata)
7308                 return 1;
7309         path = btrfs_alloc_path();
7310         if (!path)
7311                 return -ENOMEM;
7312         list_for_each_entry(back, &rec->backrefs, list) {
7313                 if (back->full_backref || !back->is_data ||
7314                     !back->found_extent_tree)
7315                         continue;
7316                 dback = to_data_backref(back);
7317                 if (dback->found_ref)
7318                         continue;
7319                 key.objectid = dback->root;
7320                 key.type = BTRFS_ROOT_ITEM_KEY;
7321                 key.offset = (u64)-1;
7322
7323                 dest_root = btrfs_read_fs_root(fs_info, &key);
7324
7325                 /* For non-exist root we just skip it */
7326                 if (IS_ERR(dest_root) || !dest_root)
7327                         continue;
7328
7329                 key.objectid = dback->owner;
7330                 key.type = BTRFS_EXTENT_DATA_KEY;
7331                 key.offset = dback->offset;
7332
7333                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7334                 /*
7335                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7336                  * we need to record it for inode/file extent rebuild.
7337                  * For ret > 0, we record it only for file extent rebuild.
7338                  * For ret == 0, the file extent exists but only bytenr
7339                  * mismatch, let the original bytenr fix routine to handle,
7340                  * don't record it.
7341                  */
7342                 if (ret == 0)
7343                         continue;
7344                 ret = 0;
7345                 orphan = malloc(sizeof(*orphan));
7346                 if (!orphan) {
7347                         ret = -ENOMEM;
7348                         goto out;
7349                 }
7350                 INIT_LIST_HEAD(&orphan->list);
7351                 orphan->root = dback->root;
7352                 orphan->objectid = dback->owner;
7353                 orphan->offset = dback->offset;
7354                 orphan->disk_bytenr = rec->cache.start;
7355                 orphan->disk_len = rec->cache.size;
7356                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7357                 recorded_data_ref = 1;
7358         }
7359 out:
7360         btrfs_free_path(path);
7361         if (!ret)
7362                 return !recorded_data_ref;
7363         else
7364                 return ret;
7365 }
7366
7367 /*
7368  * when an incorrect extent item is found, this will delete
7369  * all of the existing entries for it and recreate them
7370  * based on what the tree scan found.
7371  */
7372 static int fixup_extent_refs(struct btrfs_fs_info *info,
7373                              struct cache_tree *extent_cache,
7374                              struct extent_record *rec)
7375 {
7376         struct btrfs_trans_handle *trans = NULL;
7377         int ret;
7378         struct btrfs_path *path;
7379         struct list_head *cur = rec->backrefs.next;
7380         struct cache_extent *cache;
7381         struct extent_backref *back;
7382         int allocated = 0;
7383         u64 flags = 0;
7384
7385         if (rec->flag_block_full_backref)
7386                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7387
7388         path = btrfs_alloc_path();
7389         if (!path)
7390                 return -ENOMEM;
7391
7392         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7393                 /*
7394                  * Sometimes the backrefs themselves are so broken they don't
7395                  * get attached to any meaningful rec, so first go back and
7396                  * check any of our backrefs that we couldn't find and throw
7397                  * them into the list if we find the backref so that
7398                  * verify_backrefs can figure out what to do.
7399                  */
7400                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7401                 if (ret < 0)
7402                         goto out;
7403         }
7404
7405         /* step one, make sure all of the backrefs agree */
7406         ret = verify_backrefs(info, path, rec);
7407         if (ret < 0)
7408                 goto out;
7409
7410         trans = btrfs_start_transaction(info->extent_root, 1);
7411         if (IS_ERR(trans)) {
7412                 ret = PTR_ERR(trans);
7413                 goto out;
7414         }
7415
7416         /* step two, delete all the existing records */
7417         ret = delete_extent_records(trans, info->extent_root, path,
7418                                     rec->start, rec->max_size);
7419
7420         if (ret < 0)
7421                 goto out;
7422
7423         /* was this block corrupt?  If so, don't add references to it */
7424         cache = lookup_cache_extent(info->corrupt_blocks,
7425                                     rec->start, rec->max_size);
7426         if (cache) {
7427                 ret = 0;
7428                 goto out;
7429         }
7430
7431         /* step three, recreate all the refs we did find */
7432         while(cur != &rec->backrefs) {
7433                 back = to_extent_backref(cur);
7434                 cur = cur->next;
7435
7436                 /*
7437                  * if we didn't find any references, don't create a
7438                  * new extent record
7439                  */
7440                 if (!back->found_ref)
7441                         continue;
7442
7443                 rec->bad_full_backref = 0;
7444                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7445                 allocated = 1;
7446
7447                 if (ret)
7448                         goto out;
7449         }
7450 out:
7451         if (trans) {
7452                 int err = btrfs_commit_transaction(trans, info->extent_root);
7453                 if (!ret)
7454                         ret = err;
7455         }
7456
7457         btrfs_free_path(path);
7458         return ret;
7459 }
7460
7461 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7462                               struct extent_record *rec)
7463 {
7464         struct btrfs_trans_handle *trans;
7465         struct btrfs_root *root = fs_info->extent_root;
7466         struct btrfs_path *path;
7467         struct btrfs_extent_item *ei;
7468         struct btrfs_key key;
7469         u64 flags;
7470         int ret = 0;
7471
7472         key.objectid = rec->start;
7473         if (rec->metadata) {
7474                 key.type = BTRFS_METADATA_ITEM_KEY;
7475                 key.offset = rec->info_level;
7476         } else {
7477                 key.type = BTRFS_EXTENT_ITEM_KEY;
7478                 key.offset = rec->max_size;
7479         }
7480
7481         path = btrfs_alloc_path();
7482         if (!path)
7483                 return -ENOMEM;
7484
7485         trans = btrfs_start_transaction(root, 0);
7486         if (IS_ERR(trans)) {
7487                 btrfs_free_path(path);
7488                 return PTR_ERR(trans);
7489         }
7490
7491         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7492         if (ret < 0) {
7493                 btrfs_free_path(path);
7494                 btrfs_commit_transaction(trans, root);
7495                 return ret;
7496         } else if (ret) {
7497                 fprintf(stderr, "Didn't find extent for %llu\n",
7498                         (unsigned long long)rec->start);
7499                 btrfs_free_path(path);
7500                 btrfs_commit_transaction(trans, root);
7501                 return -ENOENT;
7502         }
7503
7504         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7505                             struct btrfs_extent_item);
7506         flags = btrfs_extent_flags(path->nodes[0], ei);
7507         if (rec->flag_block_full_backref) {
7508                 fprintf(stderr, "setting full backref on %llu\n",
7509                         (unsigned long long)key.objectid);
7510                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7511         } else {
7512                 fprintf(stderr, "clearing full backref on %llu\n",
7513                         (unsigned long long)key.objectid);
7514                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7515         }
7516         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7517         btrfs_mark_buffer_dirty(path->nodes[0]);
7518         btrfs_free_path(path);
7519         return btrfs_commit_transaction(trans, root);
7520 }
7521
7522 /* right now we only prune from the extent allocation tree */
7523 static int prune_one_block(struct btrfs_trans_handle *trans,
7524                            struct btrfs_fs_info *info,
7525                            struct btrfs_corrupt_block *corrupt)
7526 {
7527         int ret;
7528         struct btrfs_path path;
7529         struct extent_buffer *eb;
7530         u64 found;
7531         int slot;
7532         int nritems;
7533         int level = corrupt->level + 1;
7534
7535         btrfs_init_path(&path);
7536 again:
7537         /* we want to stop at the parent to our busted block */
7538         path.lowest_level = level;
7539
7540         ret = btrfs_search_slot(trans, info->extent_root,
7541                                 &corrupt->key, &path, -1, 1);
7542
7543         if (ret < 0)
7544                 goto out;
7545
7546         eb = path.nodes[level];
7547         if (!eb) {
7548                 ret = -ENOENT;
7549                 goto out;
7550         }
7551
7552         /*
7553          * hopefully the search gave us the block we want to prune,
7554          * lets try that first
7555          */
7556         slot = path.slots[level];
7557         found =  btrfs_node_blockptr(eb, slot);
7558         if (found == corrupt->cache.start)
7559                 goto del_ptr;
7560
7561         nritems = btrfs_header_nritems(eb);
7562
7563         /* the search failed, lets scan this node and hope we find it */
7564         for (slot = 0; slot < nritems; slot++) {
7565                 found =  btrfs_node_blockptr(eb, slot);
7566                 if (found == corrupt->cache.start)
7567                         goto del_ptr;
7568         }
7569         /*
7570          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7571          * to this block
7572          */
7573         if (eb == info->extent_root->node) {
7574                 ret = -ENOENT;
7575                 goto out;
7576         } else {
7577                 level++;
7578                 btrfs_release_path(&path);
7579                 goto again;
7580         }
7581
7582 del_ptr:
7583         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7584         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7585
7586 out:
7587         btrfs_release_path(&path);
7588         return ret;
7589 }
7590
7591 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7592 {
7593         struct btrfs_trans_handle *trans = NULL;
7594         struct cache_extent *cache;
7595         struct btrfs_corrupt_block *corrupt;
7596
7597         while (1) {
7598                 cache = search_cache_extent(info->corrupt_blocks, 0);
7599                 if (!cache)
7600                         break;
7601                 if (!trans) {
7602                         trans = btrfs_start_transaction(info->extent_root, 1);
7603                         if (IS_ERR(trans))
7604                                 return PTR_ERR(trans);
7605                 }
7606                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7607                 prune_one_block(trans, info, corrupt);
7608                 remove_cache_extent(info->corrupt_blocks, cache);
7609         }
7610         if (trans)
7611                 return btrfs_commit_transaction(trans, info->extent_root);
7612         return 0;
7613 }
7614
7615 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7616 {
7617         struct btrfs_block_group_cache *cache;
7618         u64 start, end;
7619         int ret;
7620
7621         while (1) {
7622                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7623                                             &start, &end, EXTENT_DIRTY);
7624                 if (ret)
7625                         break;
7626                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7627                                    GFP_NOFS);
7628         }
7629
7630         start = 0;
7631         while (1) {
7632                 cache = btrfs_lookup_first_block_group(fs_info, start);
7633                 if (!cache)
7634                         break;
7635                 if (cache->cached)
7636                         cache->cached = 0;
7637                 start = cache->key.objectid + cache->key.offset;
7638         }
7639 }
7640
7641 static int check_extent_refs(struct btrfs_root *root,
7642                              struct cache_tree *extent_cache)
7643 {
7644         struct extent_record *rec;
7645         struct cache_extent *cache;
7646         int err = 0;
7647         int ret = 0;
7648         int fixed = 0;
7649         int had_dups = 0;
7650         int recorded = 0;
7651
7652         if (repair) {
7653                 /*
7654                  * if we're doing a repair, we have to make sure
7655                  * we don't allocate from the problem extents.
7656                  * In the worst case, this will be all the
7657                  * extents in the FS
7658                  */
7659                 cache = search_cache_extent(extent_cache, 0);
7660                 while(cache) {
7661                         rec = container_of(cache, struct extent_record, cache);
7662                         set_extent_dirty(root->fs_info->excluded_extents,
7663                                          rec->start,
7664                                          rec->start + rec->max_size - 1,
7665                                          GFP_NOFS);
7666                         cache = next_cache_extent(cache);
7667                 }
7668
7669                 /* pin down all the corrupted blocks too */
7670                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7671                 while(cache) {
7672                         set_extent_dirty(root->fs_info->excluded_extents,
7673                                          cache->start,
7674                                          cache->start + cache->size - 1,
7675                                          GFP_NOFS);
7676                         cache = next_cache_extent(cache);
7677                 }
7678                 prune_corrupt_blocks(root->fs_info);
7679                 reset_cached_block_groups(root->fs_info);
7680         }
7681
7682         reset_cached_block_groups(root->fs_info);
7683
7684         /*
7685          * We need to delete any duplicate entries we find first otherwise we
7686          * could mess up the extent tree when we have backrefs that actually
7687          * belong to a different extent item and not the weird duplicate one.
7688          */
7689         while (repair && !list_empty(&duplicate_extents)) {
7690                 rec = to_extent_record(duplicate_extents.next);
7691                 list_del_init(&rec->list);
7692
7693                 /* Sometimes we can find a backref before we find an actual
7694                  * extent, so we need to process it a little bit to see if there
7695                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7696                  * if this is a backref screwup.  If we need to delete stuff
7697                  * process_duplicates() will return 0, otherwise it will return
7698                  * 1 and we
7699                  */
7700                 if (process_duplicates(root, extent_cache, rec))
7701                         continue;
7702                 ret = delete_duplicate_records(root, rec);
7703                 if (ret < 0)
7704                         return ret;
7705                 /*
7706                  * delete_duplicate_records will return the number of entries
7707                  * deleted, so if it's greater than 0 then we know we actually
7708                  * did something and we need to remove.
7709                  */
7710                 if (ret)
7711                         had_dups = 1;
7712         }
7713
7714         if (had_dups)
7715                 return -EAGAIN;
7716
7717         while(1) {
7718                 int cur_err = 0;
7719
7720                 fixed = 0;
7721                 recorded = 0;
7722                 cache = search_cache_extent(extent_cache, 0);
7723                 if (!cache)
7724                         break;
7725                 rec = container_of(cache, struct extent_record, cache);
7726                 if (rec->num_duplicates) {
7727                         fprintf(stderr, "extent item %llu has multiple extent "
7728                                 "items\n", (unsigned long long)rec->start);
7729                         err = 1;
7730                         cur_err = 1;
7731                 }
7732
7733                 if (rec->refs != rec->extent_item_refs) {
7734                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7735                                 (unsigned long long)rec->start,
7736                                 (unsigned long long)rec->nr);
7737                         fprintf(stderr, "extent item %llu, found %llu\n",
7738                                 (unsigned long long)rec->extent_item_refs,
7739                                 (unsigned long long)rec->refs);
7740                         ret = record_orphan_data_extents(root->fs_info, rec);
7741                         if (ret < 0)
7742                                 goto repair_abort;
7743                         if (ret == 0) {
7744                                 recorded = 1;
7745                         } else {
7746                                 /*
7747                                  * we can't use the extent to repair file
7748                                  * extent, let the fallback method handle it.
7749                                  */
7750                                 if (!fixed && repair) {
7751                                         ret = fixup_extent_refs(
7752                                                         root->fs_info,
7753                                                         extent_cache, rec);
7754                                         if (ret)
7755                                                 goto repair_abort;
7756                                         fixed = 1;
7757                                 }
7758                         }
7759                         err = 1;
7760                         cur_err = 1;
7761                 }
7762                 if (all_backpointers_checked(rec, 1)) {
7763                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7764                                 (unsigned long long)rec->start,
7765                                 (unsigned long long)rec->nr);
7766
7767                         if (!fixed && !recorded && repair) {
7768                                 ret = fixup_extent_refs(root->fs_info,
7769                                                         extent_cache, rec);
7770                                 if (ret)
7771                                         goto repair_abort;
7772                                 fixed = 1;
7773                         }
7774                         cur_err = 1;
7775                         err = 1;
7776                 }
7777                 if (!rec->owner_ref_checked) {
7778                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7779                                 (unsigned long long)rec->start,
7780                                 (unsigned long long)rec->nr);
7781                         if (!fixed && !recorded && repair) {
7782                                 ret = fixup_extent_refs(root->fs_info,
7783                                                         extent_cache, rec);
7784                                 if (ret)
7785                                         goto repair_abort;
7786                                 fixed = 1;
7787                         }
7788                         err = 1;
7789                         cur_err = 1;
7790                 }
7791                 if (rec->bad_full_backref) {
7792                         fprintf(stderr, "bad full backref, on [%llu]\n",
7793                                 (unsigned long long)rec->start);
7794                         if (repair) {
7795                                 ret = fixup_extent_flags(root->fs_info, rec);
7796                                 if (ret)
7797                                         goto repair_abort;
7798                                 fixed = 1;
7799                         }
7800                         err = 1;
7801                         cur_err = 1;
7802                 }
7803                 /*
7804                  * Although it's not a extent ref's problem, we reuse this
7805                  * routine for error reporting.
7806                  * No repair function yet.
7807                  */
7808                 if (rec->crossing_stripes) {
7809                         fprintf(stderr,
7810                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7811                                 rec->start, rec->start + rec->max_size);
7812                         err = 1;
7813                         cur_err = 1;
7814                 }
7815
7816                 if (rec->wrong_chunk_type) {
7817                         fprintf(stderr,
7818                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7819                                 rec->start, rec->start + rec->max_size);
7820                         err = 1;
7821                         cur_err = 1;
7822                 }
7823
7824                 remove_cache_extent(extent_cache, cache);
7825                 free_all_extent_backrefs(rec);
7826                 if (!init_extent_tree && repair && (!cur_err || fixed))
7827                         clear_extent_dirty(root->fs_info->excluded_extents,
7828                                            rec->start,
7829                                            rec->start + rec->max_size - 1,
7830                                            GFP_NOFS);
7831                 free(rec);
7832         }
7833 repair_abort:
7834         if (repair) {
7835                 if (ret && ret != -EAGAIN) {
7836                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7837                         exit(1);
7838                 } else if (!ret) {
7839                         struct btrfs_trans_handle *trans;
7840
7841                         root = root->fs_info->extent_root;
7842                         trans = btrfs_start_transaction(root, 1);
7843                         if (IS_ERR(trans)) {
7844                                 ret = PTR_ERR(trans);
7845                                 goto repair_abort;
7846                         }
7847
7848                         btrfs_fix_block_accounting(trans, root);
7849                         ret = btrfs_commit_transaction(trans, root);
7850                         if (ret)
7851                                 goto repair_abort;
7852                 }
7853                 if (err)
7854                         fprintf(stderr, "repaired damaged extent references\n");
7855                 return ret;
7856         }
7857         return err;
7858 }
7859
7860 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7861 {
7862         u64 stripe_size;
7863
7864         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7865                 stripe_size = length;
7866                 stripe_size /= num_stripes;
7867         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7868                 stripe_size = length * 2;
7869                 stripe_size /= num_stripes;
7870         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7871                 stripe_size = length;
7872                 stripe_size /= (num_stripes - 1);
7873         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7874                 stripe_size = length;
7875                 stripe_size /= (num_stripes - 2);
7876         } else {
7877                 stripe_size = length;
7878         }
7879         return stripe_size;
7880 }
7881
7882 /*
7883  * Check the chunk with its block group/dev list ref:
7884  * Return 0 if all refs seems valid.
7885  * Return 1 if part of refs seems valid, need later check for rebuild ref
7886  * like missing block group and needs to search extent tree to rebuild them.
7887  * Return -1 if essential refs are missing and unable to rebuild.
7888  */
7889 static int check_chunk_refs(struct chunk_record *chunk_rec,
7890                             struct block_group_tree *block_group_cache,
7891                             struct device_extent_tree *dev_extent_cache,
7892                             int silent)
7893 {
7894         struct cache_extent *block_group_item;
7895         struct block_group_record *block_group_rec;
7896         struct cache_extent *dev_extent_item;
7897         struct device_extent_record *dev_extent_rec;
7898         u64 devid;
7899         u64 offset;
7900         u64 length;
7901         int metadump_v2 = 0;
7902         int i;
7903         int ret = 0;
7904
7905         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7906                                                chunk_rec->offset,
7907                                                chunk_rec->length);
7908         if (block_group_item) {
7909                 block_group_rec = container_of(block_group_item,
7910                                                struct block_group_record,
7911                                                cache);
7912                 if (chunk_rec->length != block_group_rec->offset ||
7913                     chunk_rec->offset != block_group_rec->objectid ||
7914                     (!metadump_v2 &&
7915                      chunk_rec->type_flags != block_group_rec->flags)) {
7916                         if (!silent)
7917                                 fprintf(stderr,
7918                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7919                                         chunk_rec->objectid,
7920                                         chunk_rec->type,
7921                                         chunk_rec->offset,
7922                                         chunk_rec->length,
7923                                         chunk_rec->offset,
7924                                         chunk_rec->type_flags,
7925                                         block_group_rec->objectid,
7926                                         block_group_rec->type,
7927                                         block_group_rec->offset,
7928                                         block_group_rec->offset,
7929                                         block_group_rec->objectid,
7930                                         block_group_rec->flags);
7931                         ret = -1;
7932                 } else {
7933                         list_del_init(&block_group_rec->list);
7934                         chunk_rec->bg_rec = block_group_rec;
7935                 }
7936         } else {
7937                 if (!silent)
7938                         fprintf(stderr,
7939                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7940                                 chunk_rec->objectid,
7941                                 chunk_rec->type,
7942                                 chunk_rec->offset,
7943                                 chunk_rec->length,
7944                                 chunk_rec->offset,
7945                                 chunk_rec->type_flags);
7946                 ret = 1;
7947         }
7948
7949         if (metadump_v2)
7950                 return ret;
7951
7952         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7953                                     chunk_rec->num_stripes);
7954         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7955                 devid = chunk_rec->stripes[i].devid;
7956                 offset = chunk_rec->stripes[i].offset;
7957                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7958                                                        devid, offset, length);
7959                 if (dev_extent_item) {
7960                         dev_extent_rec = container_of(dev_extent_item,
7961                                                 struct device_extent_record,
7962                                                 cache);
7963                         if (dev_extent_rec->objectid != devid ||
7964                             dev_extent_rec->offset != offset ||
7965                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7966                             dev_extent_rec->length != length) {
7967                                 if (!silent)
7968                                         fprintf(stderr,
7969                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7970                                                 chunk_rec->objectid,
7971                                                 chunk_rec->type,
7972                                                 chunk_rec->offset,
7973                                                 chunk_rec->stripes[i].devid,
7974                                                 chunk_rec->stripes[i].offset,
7975                                                 dev_extent_rec->objectid,
7976                                                 dev_extent_rec->offset,
7977                                                 dev_extent_rec->length);
7978                                 ret = -1;
7979                         } else {
7980                                 list_move(&dev_extent_rec->chunk_list,
7981                                           &chunk_rec->dextents);
7982                         }
7983                 } else {
7984                         if (!silent)
7985                                 fprintf(stderr,
7986                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7987                                         chunk_rec->objectid,
7988                                         chunk_rec->type,
7989                                         chunk_rec->offset,
7990                                         chunk_rec->stripes[i].devid,
7991                                         chunk_rec->stripes[i].offset);
7992                         ret = -1;
7993                 }
7994         }
7995         return ret;
7996 }
7997
7998 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7999 int check_chunks(struct cache_tree *chunk_cache,
8000                  struct block_group_tree *block_group_cache,
8001                  struct device_extent_tree *dev_extent_cache,
8002                  struct list_head *good, struct list_head *bad,
8003                  struct list_head *rebuild, int silent)
8004 {
8005         struct cache_extent *chunk_item;
8006         struct chunk_record *chunk_rec;
8007         struct block_group_record *bg_rec;
8008         struct device_extent_record *dext_rec;
8009         int err;
8010         int ret = 0;
8011
8012         chunk_item = first_cache_extent(chunk_cache);
8013         while (chunk_item) {
8014                 chunk_rec = container_of(chunk_item, struct chunk_record,
8015                                          cache);
8016                 err = check_chunk_refs(chunk_rec, block_group_cache,
8017                                        dev_extent_cache, silent);
8018                 if (err < 0)
8019                         ret = err;
8020                 if (err == 0 && good)
8021                         list_add_tail(&chunk_rec->list, good);
8022                 if (err > 0 && rebuild)
8023                         list_add_tail(&chunk_rec->list, rebuild);
8024                 if (err < 0 && bad)
8025                         list_add_tail(&chunk_rec->list, bad);
8026                 chunk_item = next_cache_extent(chunk_item);
8027         }
8028
8029         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8030                 if (!silent)
8031                         fprintf(stderr,
8032                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8033                                 bg_rec->objectid,
8034                                 bg_rec->offset,
8035                                 bg_rec->flags);
8036                 if (!ret)
8037                         ret = 1;
8038         }
8039
8040         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8041                             chunk_list) {
8042                 if (!silent)
8043                         fprintf(stderr,
8044                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8045                                 dext_rec->objectid,
8046                                 dext_rec->offset,
8047                                 dext_rec->length);
8048                 if (!ret)
8049                         ret = 1;
8050         }
8051         return ret;
8052 }
8053
8054
8055 static int check_device_used(struct device_record *dev_rec,
8056                              struct device_extent_tree *dext_cache)
8057 {
8058         struct cache_extent *cache;
8059         struct device_extent_record *dev_extent_rec;
8060         u64 total_byte = 0;
8061
8062         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8063         while (cache) {
8064                 dev_extent_rec = container_of(cache,
8065                                               struct device_extent_record,
8066                                               cache);
8067                 if (dev_extent_rec->objectid != dev_rec->devid)
8068                         break;
8069
8070                 list_del_init(&dev_extent_rec->device_list);
8071                 total_byte += dev_extent_rec->length;
8072                 cache = next_cache_extent(cache);
8073         }
8074
8075         if (total_byte != dev_rec->byte_used) {
8076                 fprintf(stderr,
8077                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8078                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8079                         dev_rec->type, dev_rec->offset);
8080                 return -1;
8081         } else {
8082                 return 0;
8083         }
8084 }
8085
8086 /* check btrfs_dev_item -> btrfs_dev_extent */
8087 static int check_devices(struct rb_root *dev_cache,
8088                          struct device_extent_tree *dev_extent_cache)
8089 {
8090         struct rb_node *dev_node;
8091         struct device_record *dev_rec;
8092         struct device_extent_record *dext_rec;
8093         int err;
8094         int ret = 0;
8095
8096         dev_node = rb_first(dev_cache);
8097         while (dev_node) {
8098                 dev_rec = container_of(dev_node, struct device_record, node);
8099                 err = check_device_used(dev_rec, dev_extent_cache);
8100                 if (err)
8101                         ret = err;
8102
8103                 dev_node = rb_next(dev_node);
8104         }
8105         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8106                             device_list) {
8107                 fprintf(stderr,
8108                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8109                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8110                 if (!ret)
8111                         ret = 1;
8112         }
8113         return ret;
8114 }
8115
8116 static int add_root_item_to_list(struct list_head *head,
8117                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8118                                   u8 level, u8 drop_level,
8119                                   int level_size, struct btrfs_key *drop_key)
8120 {
8121
8122         struct root_item_record *ri_rec;
8123         ri_rec = malloc(sizeof(*ri_rec));
8124         if (!ri_rec)
8125                 return -ENOMEM;
8126         ri_rec->bytenr = bytenr;
8127         ri_rec->objectid = objectid;
8128         ri_rec->level = level;
8129         ri_rec->level_size = level_size;
8130         ri_rec->drop_level = drop_level;
8131         ri_rec->last_snapshot = last_snapshot;
8132         if (drop_key)
8133                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8134         list_add_tail(&ri_rec->list, head);
8135
8136         return 0;
8137 }
8138
8139 static void free_root_item_list(struct list_head *list)
8140 {
8141         struct root_item_record *ri_rec;
8142
8143         while (!list_empty(list)) {
8144                 ri_rec = list_first_entry(list, struct root_item_record,
8145                                           list);
8146                 list_del_init(&ri_rec->list);
8147                 free(ri_rec);
8148         }
8149 }
8150
8151 static int deal_root_from_list(struct list_head *list,
8152                                struct btrfs_root *root,
8153                                struct block_info *bits,
8154                                int bits_nr,
8155                                struct cache_tree *pending,
8156                                struct cache_tree *seen,
8157                                struct cache_tree *reada,
8158                                struct cache_tree *nodes,
8159                                struct cache_tree *extent_cache,
8160                                struct cache_tree *chunk_cache,
8161                                struct rb_root *dev_cache,
8162                                struct block_group_tree *block_group_cache,
8163                                struct device_extent_tree *dev_extent_cache)
8164 {
8165         int ret = 0;
8166         u64 last;
8167
8168         while (!list_empty(list)) {
8169                 struct root_item_record *rec;
8170                 struct extent_buffer *buf;
8171                 rec = list_entry(list->next,
8172                                  struct root_item_record, list);
8173                 last = 0;
8174                 buf = read_tree_block(root->fs_info->tree_root,
8175                                       rec->bytenr, rec->level_size, 0);
8176                 if (!extent_buffer_uptodate(buf)) {
8177                         free_extent_buffer(buf);
8178                         ret = -EIO;
8179                         break;
8180                 }
8181                 add_root_to_pending(buf, extent_cache, pending,
8182                                     seen, nodes, rec->objectid);
8183                 /*
8184                  * To rebuild extent tree, we need deal with snapshot
8185                  * one by one, otherwise we deal with node firstly which
8186                  * can maximize readahead.
8187                  */
8188                 while (1) {
8189                         ret = run_next_block(root, bits, bits_nr, &last,
8190                                              pending, seen, reada, nodes,
8191                                              extent_cache, chunk_cache,
8192                                              dev_cache, block_group_cache,
8193                                              dev_extent_cache, rec);
8194                         if (ret != 0)
8195                                 break;
8196                 }
8197                 free_extent_buffer(buf);
8198                 list_del(&rec->list);
8199                 free(rec);
8200                 if (ret < 0)
8201                         break;
8202         }
8203         while (ret >= 0) {
8204                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8205                                      reada, nodes, extent_cache, chunk_cache,
8206                                      dev_cache, block_group_cache,
8207                                      dev_extent_cache, NULL);
8208                 if (ret != 0) {
8209                         if (ret > 0)
8210                                 ret = 0;
8211                         break;
8212                 }
8213         }
8214         return ret;
8215 }
8216
8217 static int check_chunks_and_extents(struct btrfs_root *root)
8218 {
8219         struct rb_root dev_cache;
8220         struct cache_tree chunk_cache;
8221         struct block_group_tree block_group_cache;
8222         struct device_extent_tree dev_extent_cache;
8223         struct cache_tree extent_cache;
8224         struct cache_tree seen;
8225         struct cache_tree pending;
8226         struct cache_tree reada;
8227         struct cache_tree nodes;
8228         struct extent_io_tree excluded_extents;
8229         struct cache_tree corrupt_blocks;
8230         struct btrfs_path path;
8231         struct btrfs_key key;
8232         struct btrfs_key found_key;
8233         int ret, err = 0;
8234         struct block_info *bits;
8235         int bits_nr;
8236         struct extent_buffer *leaf;
8237         int slot;
8238         struct btrfs_root_item ri;
8239         struct list_head dropping_trees;
8240         struct list_head normal_trees;
8241         struct btrfs_root *root1;
8242         u64 objectid;
8243         u32 level_size;
8244         u8 level;
8245
8246         dev_cache = RB_ROOT;
8247         cache_tree_init(&chunk_cache);
8248         block_group_tree_init(&block_group_cache);
8249         device_extent_tree_init(&dev_extent_cache);
8250
8251         cache_tree_init(&extent_cache);
8252         cache_tree_init(&seen);
8253         cache_tree_init(&pending);
8254         cache_tree_init(&nodes);
8255         cache_tree_init(&reada);
8256         cache_tree_init(&corrupt_blocks);
8257         extent_io_tree_init(&excluded_extents);
8258         INIT_LIST_HEAD(&dropping_trees);
8259         INIT_LIST_HEAD(&normal_trees);
8260
8261         if (repair) {
8262                 root->fs_info->excluded_extents = &excluded_extents;
8263                 root->fs_info->fsck_extent_cache = &extent_cache;
8264                 root->fs_info->free_extent_hook = free_extent_hook;
8265                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8266         }
8267
8268         bits_nr = 1024;
8269         bits = malloc(bits_nr * sizeof(struct block_info));
8270         if (!bits) {
8271                 perror("malloc");
8272                 exit(1);
8273         }
8274
8275         if (ctx.progress_enabled) {
8276                 ctx.tp = TASK_EXTENTS;
8277                 task_start(ctx.info);
8278         }
8279
8280 again:
8281         root1 = root->fs_info->tree_root;
8282         level = btrfs_header_level(root1->node);
8283         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8284                                     root1->node->start, 0, level, 0,
8285                                     root1->nodesize, NULL);
8286         if (ret < 0)
8287                 goto out;
8288         root1 = root->fs_info->chunk_root;
8289         level = btrfs_header_level(root1->node);
8290         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8291                                     root1->node->start, 0, level, 0,
8292                                     root1->nodesize, NULL);
8293         if (ret < 0)
8294                 goto out;
8295         btrfs_init_path(&path);
8296         key.offset = 0;
8297         key.objectid = 0;
8298         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8299         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8300                                         &key, &path, 0, 0);
8301         if (ret < 0)
8302                 goto out;
8303         while(1) {
8304                 leaf = path.nodes[0];
8305                 slot = path.slots[0];
8306                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8307                         ret = btrfs_next_leaf(root, &path);
8308                         if (ret != 0)
8309                                 break;
8310                         leaf = path.nodes[0];
8311                         slot = path.slots[0];
8312                 }
8313                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8314                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8315                         unsigned long offset;
8316                         u64 last_snapshot;
8317
8318                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8319                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8320                         last_snapshot = btrfs_root_last_snapshot(&ri);
8321                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8322                                 level = btrfs_root_level(&ri);
8323                                 level_size = root->nodesize;
8324                                 ret = add_root_item_to_list(&normal_trees,
8325                                                 found_key.objectid,
8326                                                 btrfs_root_bytenr(&ri),
8327                                                 last_snapshot, level,
8328                                                 0, level_size, NULL);
8329                                 if (ret < 0)
8330                                         goto out;
8331                         } else {
8332                                 level = btrfs_root_level(&ri);
8333                                 level_size = root->nodesize;
8334                                 objectid = found_key.objectid;
8335                                 btrfs_disk_key_to_cpu(&found_key,
8336                                                       &ri.drop_progress);
8337                                 ret = add_root_item_to_list(&dropping_trees,
8338                                                 objectid,
8339                                                 btrfs_root_bytenr(&ri),
8340                                                 last_snapshot, level,
8341                                                 ri.drop_level,
8342                                                 level_size, &found_key);
8343                                 if (ret < 0)
8344                                         goto out;
8345                         }
8346                 }
8347                 path.slots[0]++;
8348         }
8349         btrfs_release_path(&path);
8350
8351         /*
8352          * check_block can return -EAGAIN if it fixes something, please keep
8353          * this in mind when dealing with return values from these functions, if
8354          * we get -EAGAIN we want to fall through and restart the loop.
8355          */
8356         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8357                                   &seen, &reada, &nodes, &extent_cache,
8358                                   &chunk_cache, &dev_cache, &block_group_cache,
8359                                   &dev_extent_cache);
8360         if (ret < 0) {
8361                 if (ret == -EAGAIN)
8362                         goto loop;
8363                 goto out;
8364         }
8365         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8366                                   &pending, &seen, &reada, &nodes,
8367                                   &extent_cache, &chunk_cache, &dev_cache,
8368                                   &block_group_cache, &dev_extent_cache);
8369         if (ret < 0) {
8370                 if (ret == -EAGAIN)
8371                         goto loop;
8372                 goto out;
8373         }
8374
8375         ret = check_chunks(&chunk_cache, &block_group_cache,
8376                            &dev_extent_cache, NULL, NULL, NULL, 0);
8377         if (ret) {
8378                 if (ret == -EAGAIN)
8379                         goto loop;
8380                 err = ret;
8381         }
8382
8383         ret = check_extent_refs(root, &extent_cache);
8384         if (ret < 0) {
8385                 if (ret == -EAGAIN)
8386                         goto loop;
8387                 goto out;
8388         }
8389
8390         ret = check_devices(&dev_cache, &dev_extent_cache);
8391         if (ret && err)
8392                 ret = err;
8393
8394 out:
8395         task_stop(ctx.info);
8396         if (repair) {
8397                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8398                 extent_io_tree_cleanup(&excluded_extents);
8399                 root->fs_info->fsck_extent_cache = NULL;
8400                 root->fs_info->free_extent_hook = NULL;
8401                 root->fs_info->corrupt_blocks = NULL;
8402                 root->fs_info->excluded_extents = NULL;
8403         }
8404         free(bits);
8405         free_chunk_cache_tree(&chunk_cache);
8406         free_device_cache_tree(&dev_cache);
8407         free_block_group_tree(&block_group_cache);
8408         free_device_extent_tree(&dev_extent_cache);
8409         free_extent_cache_tree(&seen);
8410         free_extent_cache_tree(&pending);
8411         free_extent_cache_tree(&reada);
8412         free_extent_cache_tree(&nodes);
8413         return ret;
8414 loop:
8415         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8416         free_extent_cache_tree(&seen);
8417         free_extent_cache_tree(&pending);
8418         free_extent_cache_tree(&reada);
8419         free_extent_cache_tree(&nodes);
8420         free_chunk_cache_tree(&chunk_cache);
8421         free_block_group_tree(&block_group_cache);
8422         free_device_cache_tree(&dev_cache);
8423         free_device_extent_tree(&dev_extent_cache);
8424         free_extent_record_cache(root->fs_info, &extent_cache);
8425         free_root_item_list(&normal_trees);
8426         free_root_item_list(&dropping_trees);
8427         extent_io_tree_cleanup(&excluded_extents);
8428         goto again;
8429 }
8430
8431 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8432                            struct btrfs_root *root, int overwrite)
8433 {
8434         struct extent_buffer *c;
8435         struct extent_buffer *old = root->node;
8436         int level;
8437         int ret;
8438         struct btrfs_disk_key disk_key = {0,0,0};
8439
8440         level = 0;
8441
8442         if (overwrite) {
8443                 c = old;
8444                 extent_buffer_get(c);
8445                 goto init;
8446         }
8447         c = btrfs_alloc_free_block(trans, root,
8448                                    root->nodesize,
8449                                    root->root_key.objectid,
8450                                    &disk_key, level, 0, 0);
8451         if (IS_ERR(c)) {
8452                 c = old;
8453                 extent_buffer_get(c);
8454                 overwrite = 1;
8455         }
8456 init:
8457         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8458         btrfs_set_header_level(c, level);
8459         btrfs_set_header_bytenr(c, c->start);
8460         btrfs_set_header_generation(c, trans->transid);
8461         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8462         btrfs_set_header_owner(c, root->root_key.objectid);
8463
8464         write_extent_buffer(c, root->fs_info->fsid,
8465                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8466
8467         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8468                             btrfs_header_chunk_tree_uuid(c),
8469                             BTRFS_UUID_SIZE);
8470
8471         btrfs_mark_buffer_dirty(c);
8472         /*
8473          * this case can happen in the following case:
8474          *
8475          * 1.overwrite previous root.
8476          *
8477          * 2.reinit reloc data root, this is because we skip pin
8478          * down reloc data tree before which means we can allocate
8479          * same block bytenr here.
8480          */
8481         if (old->start == c->start) {
8482                 btrfs_set_root_generation(&root->root_item,
8483                                           trans->transid);
8484                 root->root_item.level = btrfs_header_level(root->node);
8485                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8486                                         &root->root_key, &root->root_item);
8487                 if (ret) {
8488                         free_extent_buffer(c);
8489                         return ret;
8490                 }
8491         }
8492         free_extent_buffer(old);
8493         root->node = c;
8494         add_root_to_dirty_list(root);
8495         return 0;
8496 }
8497
8498 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8499                                 struct extent_buffer *eb, int tree_root)
8500 {
8501         struct extent_buffer *tmp;
8502         struct btrfs_root_item *ri;
8503         struct btrfs_key key;
8504         u64 bytenr;
8505         u32 nodesize;
8506         int level = btrfs_header_level(eb);
8507         int nritems;
8508         int ret;
8509         int i;
8510
8511         /*
8512          * If we have pinned this block before, don't pin it again.
8513          * This can not only avoid forever loop with broken filesystem
8514          * but also give us some speedups.
8515          */
8516         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8517                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8518                 return 0;
8519
8520         btrfs_pin_extent(fs_info, eb->start, eb->len);
8521
8522         nodesize = btrfs_super_nodesize(fs_info->super_copy);
8523         nritems = btrfs_header_nritems(eb);
8524         for (i = 0; i < nritems; i++) {
8525                 if (level == 0) {
8526                         btrfs_item_key_to_cpu(eb, &key, i);
8527                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8528                                 continue;
8529                         /* Skip the extent root and reloc roots */
8530                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8531                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8532                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8533                                 continue;
8534                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8535                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8536
8537                         /*
8538                          * If at any point we start needing the real root we
8539                          * will have to build a stump root for the root we are
8540                          * in, but for now this doesn't actually use the root so
8541                          * just pass in extent_root.
8542                          */
8543                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8544                                               nodesize, 0);
8545                         if (!extent_buffer_uptodate(tmp)) {
8546                                 fprintf(stderr, "Error reading root block\n");
8547                                 return -EIO;
8548                         }
8549                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8550                         free_extent_buffer(tmp);
8551                         if (ret)
8552                                 return ret;
8553                 } else {
8554                         bytenr = btrfs_node_blockptr(eb, i);
8555
8556                         /* If we aren't the tree root don't read the block */
8557                         if (level == 1 && !tree_root) {
8558                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
8559                                 continue;
8560                         }
8561
8562                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8563                                               nodesize, 0);
8564                         if (!extent_buffer_uptodate(tmp)) {
8565                                 fprintf(stderr, "Error reading tree block\n");
8566                                 return -EIO;
8567                         }
8568                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8569                         free_extent_buffer(tmp);
8570                         if (ret)
8571                                 return ret;
8572                 }
8573         }
8574
8575         return 0;
8576 }
8577
8578 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8579 {
8580         int ret;
8581
8582         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8583         if (ret)
8584                 return ret;
8585
8586         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8587 }
8588
8589 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8590 {
8591         struct btrfs_block_group_cache *cache;
8592         struct btrfs_path *path;
8593         struct extent_buffer *leaf;
8594         struct btrfs_chunk *chunk;
8595         struct btrfs_key key;
8596         int ret;
8597         u64 start;
8598
8599         path = btrfs_alloc_path();
8600         if (!path)
8601                 return -ENOMEM;
8602
8603         key.objectid = 0;
8604         key.type = BTRFS_CHUNK_ITEM_KEY;
8605         key.offset = 0;
8606
8607         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8608         if (ret < 0) {
8609                 btrfs_free_path(path);
8610                 return ret;
8611         }
8612
8613         /*
8614          * We do this in case the block groups were screwed up and had alloc
8615          * bits that aren't actually set on the chunks.  This happens with
8616          * restored images every time and could happen in real life I guess.
8617          */
8618         fs_info->avail_data_alloc_bits = 0;
8619         fs_info->avail_metadata_alloc_bits = 0;
8620         fs_info->avail_system_alloc_bits = 0;
8621
8622         /* First we need to create the in-memory block groups */
8623         while (1) {
8624                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8625                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8626                         if (ret < 0) {
8627                                 btrfs_free_path(path);
8628                                 return ret;
8629                         }
8630                         if (ret) {
8631                                 ret = 0;
8632                                 break;
8633                         }
8634                 }
8635                 leaf = path->nodes[0];
8636                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8637                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8638                         path->slots[0]++;
8639                         continue;
8640                 }
8641
8642                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8643                                        struct btrfs_chunk);
8644                 btrfs_add_block_group(fs_info, 0,
8645                                       btrfs_chunk_type(leaf, chunk),
8646                                       key.objectid, key.offset,
8647                                       btrfs_chunk_length(leaf, chunk));
8648                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8649                                  key.offset + btrfs_chunk_length(leaf, chunk),
8650                                  GFP_NOFS);
8651                 path->slots[0]++;
8652         }
8653         start = 0;
8654         while (1) {
8655                 cache = btrfs_lookup_first_block_group(fs_info, start);
8656                 if (!cache)
8657                         break;
8658                 cache->cached = 1;
8659                 start = cache->key.objectid + cache->key.offset;
8660         }
8661
8662         btrfs_free_path(path);
8663         return 0;
8664 }
8665
8666 static int reset_balance(struct btrfs_trans_handle *trans,
8667                          struct btrfs_fs_info *fs_info)
8668 {
8669         struct btrfs_root *root = fs_info->tree_root;
8670         struct btrfs_path *path;
8671         struct extent_buffer *leaf;
8672         struct btrfs_key key;
8673         int del_slot, del_nr = 0;
8674         int ret;
8675         int found = 0;
8676
8677         path = btrfs_alloc_path();
8678         if (!path)
8679                 return -ENOMEM;
8680
8681         key.objectid = BTRFS_BALANCE_OBJECTID;
8682         key.type = BTRFS_BALANCE_ITEM_KEY;
8683         key.offset = 0;
8684
8685         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8686         if (ret) {
8687                 if (ret > 0)
8688                         ret = 0;
8689                 if (!ret)
8690                         goto reinit_data_reloc;
8691                 else
8692                         goto out;
8693         }
8694
8695         ret = btrfs_del_item(trans, root, path);
8696         if (ret)
8697                 goto out;
8698         btrfs_release_path(path);
8699
8700         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8701         key.type = BTRFS_ROOT_ITEM_KEY;
8702         key.offset = 0;
8703
8704         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8705         if (ret < 0)
8706                 goto out;
8707         while (1) {
8708                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8709                         if (!found)
8710                                 break;
8711
8712                         if (del_nr) {
8713                                 ret = btrfs_del_items(trans, root, path,
8714                                                       del_slot, del_nr);
8715                                 del_nr = 0;
8716                                 if (ret)
8717                                         goto out;
8718                         }
8719                         key.offset++;
8720                         btrfs_release_path(path);
8721
8722                         found = 0;
8723                         ret = btrfs_search_slot(trans, root, &key, path,
8724                                                 -1, 1);
8725                         if (ret < 0)
8726                                 goto out;
8727                         continue;
8728                 }
8729                 found = 1;
8730                 leaf = path->nodes[0];
8731                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8732                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8733                         break;
8734                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8735                         path->slots[0]++;
8736                         continue;
8737                 }
8738                 if (!del_nr) {
8739                         del_slot = path->slots[0];
8740                         del_nr = 1;
8741                 } else {
8742                         del_nr++;
8743                 }
8744                 path->slots[0]++;
8745         }
8746
8747         if (del_nr) {
8748                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8749                 if (ret)
8750                         goto out;
8751         }
8752         btrfs_release_path(path);
8753
8754 reinit_data_reloc:
8755         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8756         key.type = BTRFS_ROOT_ITEM_KEY;
8757         key.offset = (u64)-1;
8758         root = btrfs_read_fs_root(fs_info, &key);
8759         if (IS_ERR(root)) {
8760                 fprintf(stderr, "Error reading data reloc tree\n");
8761                 ret = PTR_ERR(root);
8762                 goto out;
8763         }
8764         record_root_in_trans(trans, root);
8765         ret = btrfs_fsck_reinit_root(trans, root, 0);
8766         if (ret)
8767                 goto out;
8768         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8769 out:
8770         btrfs_free_path(path);
8771         return ret;
8772 }
8773
8774 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8775                               struct btrfs_fs_info *fs_info)
8776 {
8777         u64 start = 0;
8778         int ret;
8779
8780         /*
8781          * The only reason we don't do this is because right now we're just
8782          * walking the trees we find and pinning down their bytes, we don't look
8783          * at any of the leaves.  In order to do mixed groups we'd have to check
8784          * the leaves of any fs roots and pin down the bytes for any file
8785          * extents we find.  Not hard but why do it if we don't have to?
8786          */
8787         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8788                 fprintf(stderr, "We don't support re-initing the extent tree "
8789                         "for mixed block groups yet, please notify a btrfs "
8790                         "developer you want to do this so they can add this "
8791                         "functionality.\n");
8792                 return -EINVAL;
8793         }
8794
8795         /*
8796          * first we need to walk all of the trees except the extent tree and pin
8797          * down the bytes that are in use so we don't overwrite any existing
8798          * metadata.
8799          */
8800         ret = pin_metadata_blocks(fs_info);
8801         if (ret) {
8802                 fprintf(stderr, "error pinning down used bytes\n");
8803                 return ret;
8804         }
8805
8806         /*
8807          * Need to drop all the block groups since we're going to recreate all
8808          * of them again.
8809          */
8810         btrfs_free_block_groups(fs_info);
8811         ret = reset_block_groups(fs_info);
8812         if (ret) {
8813                 fprintf(stderr, "error resetting the block groups\n");
8814                 return ret;
8815         }
8816
8817         /* Ok we can allocate now, reinit the extent root */
8818         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8819         if (ret) {
8820                 fprintf(stderr, "extent root initialization failed\n");
8821                 /*
8822                  * When the transaction code is updated we should end the
8823                  * transaction, but for now progs only knows about commit so
8824                  * just return an error.
8825                  */
8826                 return ret;
8827         }
8828
8829         /*
8830          * Now we have all the in-memory block groups setup so we can make
8831          * allocations properly, and the metadata we care about is safe since we
8832          * pinned all of it above.
8833          */
8834         while (1) {
8835                 struct btrfs_block_group_cache *cache;
8836
8837                 cache = btrfs_lookup_first_block_group(fs_info, start);
8838                 if (!cache)
8839                         break;
8840                 start = cache->key.objectid + cache->key.offset;
8841                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8842                                         &cache->key, &cache->item,
8843                                         sizeof(cache->item));
8844                 if (ret) {
8845                         fprintf(stderr, "Error adding block group\n");
8846                         return ret;
8847                 }
8848                 btrfs_extent_post_op(trans, fs_info->extent_root);
8849         }
8850
8851         ret = reset_balance(trans, fs_info);
8852         if (ret)
8853                 fprintf(stderr, "error resetting the pending balance\n");
8854
8855         return ret;
8856 }
8857
8858 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8859 {
8860         struct btrfs_path *path;
8861         struct btrfs_trans_handle *trans;
8862         struct btrfs_key key;
8863         int ret;
8864
8865         printf("Recowing metadata block %llu\n", eb->start);
8866         key.objectid = btrfs_header_owner(eb);
8867         key.type = BTRFS_ROOT_ITEM_KEY;
8868         key.offset = (u64)-1;
8869
8870         root = btrfs_read_fs_root(root->fs_info, &key);
8871         if (IS_ERR(root)) {
8872                 fprintf(stderr, "Couldn't find owner root %llu\n",
8873                         key.objectid);
8874                 return PTR_ERR(root);
8875         }
8876
8877         path = btrfs_alloc_path();
8878         if (!path)
8879                 return -ENOMEM;
8880
8881         trans = btrfs_start_transaction(root, 1);
8882         if (IS_ERR(trans)) {
8883                 btrfs_free_path(path);
8884                 return PTR_ERR(trans);
8885         }
8886
8887         path->lowest_level = btrfs_header_level(eb);
8888         if (path->lowest_level)
8889                 btrfs_node_key_to_cpu(eb, &key, 0);
8890         else
8891                 btrfs_item_key_to_cpu(eb, &key, 0);
8892
8893         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8894         btrfs_commit_transaction(trans, root);
8895         btrfs_free_path(path);
8896         return ret;
8897 }
8898
8899 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8900 {
8901         struct btrfs_path *path;
8902         struct btrfs_trans_handle *trans;
8903         struct btrfs_key key;
8904         int ret;
8905
8906         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8907                bad->key.type, bad->key.offset);
8908         key.objectid = bad->root_id;
8909         key.type = BTRFS_ROOT_ITEM_KEY;
8910         key.offset = (u64)-1;
8911
8912         root = btrfs_read_fs_root(root->fs_info, &key);
8913         if (IS_ERR(root)) {
8914                 fprintf(stderr, "Couldn't find owner root %llu\n",
8915                         key.objectid);
8916                 return PTR_ERR(root);
8917         }
8918
8919         path = btrfs_alloc_path();
8920         if (!path)
8921                 return -ENOMEM;
8922
8923         trans = btrfs_start_transaction(root, 1);
8924         if (IS_ERR(trans)) {
8925                 btrfs_free_path(path);
8926                 return PTR_ERR(trans);
8927         }
8928
8929         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8930         if (ret) {
8931                 if (ret > 0)
8932                         ret = 0;
8933                 goto out;
8934         }
8935         ret = btrfs_del_item(trans, root, path);
8936 out:
8937         btrfs_commit_transaction(trans, root);
8938         btrfs_free_path(path);
8939         return ret;
8940 }
8941
8942 static int zero_log_tree(struct btrfs_root *root)
8943 {
8944         struct btrfs_trans_handle *trans;
8945         int ret;
8946
8947         trans = btrfs_start_transaction(root, 1);
8948         if (IS_ERR(trans)) {
8949                 ret = PTR_ERR(trans);
8950                 return ret;
8951         }
8952         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8953         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8954         ret = btrfs_commit_transaction(trans, root);
8955         return ret;
8956 }
8957
8958 static int populate_csum(struct btrfs_trans_handle *trans,
8959                          struct btrfs_root *csum_root, char *buf, u64 start,
8960                          u64 len)
8961 {
8962         u64 offset = 0;
8963         u64 sectorsize;
8964         int ret = 0;
8965
8966         while (offset < len) {
8967                 sectorsize = csum_root->sectorsize;
8968                 ret = read_extent_data(csum_root, buf, start + offset,
8969                                        &sectorsize, 0);
8970                 if (ret)
8971                         break;
8972                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8973                                             start + offset, buf, sectorsize);
8974                 if (ret)
8975                         break;
8976                 offset += sectorsize;
8977         }
8978         return ret;
8979 }
8980
8981 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8982                                       struct btrfs_root *csum_root,
8983                                       struct btrfs_root *cur_root)
8984 {
8985         struct btrfs_path *path;
8986         struct btrfs_key key;
8987         struct extent_buffer *node;
8988         struct btrfs_file_extent_item *fi;
8989         char *buf = NULL;
8990         u64 start = 0;
8991         u64 len = 0;
8992         int slot = 0;
8993         int ret = 0;
8994
8995         path = btrfs_alloc_path();
8996         if (!path)
8997                 return -ENOMEM;
8998         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8999         if (!buf) {
9000                 ret = -ENOMEM;
9001                 goto out;
9002         }
9003
9004         key.objectid = 0;
9005         key.offset = 0;
9006         key.type = 0;
9007
9008         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9009         if (ret < 0)
9010                 goto out;
9011         /* Iterate all regular file extents and fill its csum */
9012         while (1) {
9013                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9014
9015                 if (key.type != BTRFS_EXTENT_DATA_KEY)
9016                         goto next;
9017                 node = path->nodes[0];
9018                 slot = path->slots[0];
9019                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9020                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9021                         goto next;
9022                 start = btrfs_file_extent_disk_bytenr(node, fi);
9023                 len = btrfs_file_extent_disk_num_bytes(node, fi);
9024
9025                 ret = populate_csum(trans, csum_root, buf, start, len);
9026                 if (ret == -EEXIST)
9027                         ret = 0;
9028                 if (ret < 0)
9029                         goto out;
9030 next:
9031                 /*
9032                  * TODO: if next leaf is corrupted, jump to nearest next valid
9033                  * leaf.
9034                  */
9035                 ret = btrfs_next_item(cur_root, path);
9036                 if (ret < 0)
9037                         goto out;
9038                 if (ret > 0) {
9039                         ret = 0;
9040                         goto out;
9041                 }
9042         }
9043
9044 out:
9045         btrfs_free_path(path);
9046         free(buf);
9047         return ret;
9048 }
9049
9050 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9051                                   struct btrfs_root *csum_root)
9052 {
9053         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9054         struct btrfs_path *path;
9055         struct btrfs_root *tree_root = fs_info->tree_root;
9056         struct btrfs_root *cur_root;
9057         struct extent_buffer *node;
9058         struct btrfs_key key;
9059         int slot = 0;
9060         int ret = 0;
9061
9062         path = btrfs_alloc_path();
9063         if (!path)
9064                 return -ENOMEM;
9065
9066         key.objectid = BTRFS_FS_TREE_OBJECTID;
9067         key.offset = 0;
9068         key.type = BTRFS_ROOT_ITEM_KEY;
9069
9070         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9071         if (ret < 0)
9072                 goto out;
9073         if (ret > 0) {
9074                 ret = -ENOENT;
9075                 goto out;
9076         }
9077
9078         while (1) {
9079                 node = path->nodes[0];
9080                 slot = path->slots[0];
9081                 btrfs_item_key_to_cpu(node, &key, slot);
9082                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9083                         goto out;
9084                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9085                         goto next;
9086                 if (!is_fstree(key.objectid))
9087                         goto next;
9088                 key.offset = (u64)-1;
9089
9090                 cur_root = btrfs_read_fs_root(fs_info, &key);
9091                 if (IS_ERR(cur_root) || !cur_root) {
9092                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9093                                 key.objectid);
9094                         goto out;
9095                 }
9096                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9097                                 cur_root);
9098                 if (ret < 0)
9099                         goto out;
9100 next:
9101                 ret = btrfs_next_item(tree_root, path);
9102                 if (ret > 0) {
9103                         ret = 0;
9104                         goto out;
9105                 }
9106                 if (ret < 0)
9107                         goto out;
9108         }
9109
9110 out:
9111         btrfs_free_path(path);
9112         return ret;
9113 }
9114
9115 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9116                                       struct btrfs_root *csum_root)
9117 {
9118         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9119         struct btrfs_path *path;
9120         struct btrfs_extent_item *ei;
9121         struct extent_buffer *leaf;
9122         char *buf;
9123         struct btrfs_key key;
9124         int ret;
9125
9126         path = btrfs_alloc_path();
9127         if (!path)
9128                 return -ENOMEM;
9129
9130         key.objectid = 0;
9131         key.type = BTRFS_EXTENT_ITEM_KEY;
9132         key.offset = 0;
9133
9134         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9135         if (ret < 0) {
9136                 btrfs_free_path(path);
9137                 return ret;
9138         }
9139
9140         buf = malloc(csum_root->sectorsize);
9141         if (!buf) {
9142                 btrfs_free_path(path);
9143                 return -ENOMEM;
9144         }
9145
9146         while (1) {
9147                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9148                         ret = btrfs_next_leaf(extent_root, path);
9149                         if (ret < 0)
9150                                 break;
9151                         if (ret) {
9152                                 ret = 0;
9153                                 break;
9154                         }
9155                 }
9156                 leaf = path->nodes[0];
9157
9158                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9159                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9160                         path->slots[0]++;
9161                         continue;
9162                 }
9163
9164                 ei = btrfs_item_ptr(leaf, path->slots[0],
9165                                     struct btrfs_extent_item);
9166                 if (!(btrfs_extent_flags(leaf, ei) &
9167                       BTRFS_EXTENT_FLAG_DATA)) {
9168                         path->slots[0]++;
9169                         continue;
9170                 }
9171
9172                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9173                                     key.offset);
9174                 if (ret)
9175                         break;
9176                 path->slots[0]++;
9177         }
9178
9179         btrfs_free_path(path);
9180         free(buf);
9181         return ret;
9182 }
9183
9184 /*
9185  * Recalculate the csum and put it into the csum tree.
9186  *
9187  * Extent tree init will wipe out all the extent info, so in that case, we
9188  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9189  * will use fs/subvol trees to init the csum tree.
9190  */
9191 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9192                           struct btrfs_root *csum_root,
9193                           int search_fs_tree)
9194 {
9195         if (search_fs_tree)
9196                 return fill_csum_tree_from_fs(trans, csum_root);
9197         else
9198                 return fill_csum_tree_from_extent(trans, csum_root);
9199 }
9200
9201 static void free_roots_info_cache(void)
9202 {
9203         if (!roots_info_cache)
9204                 return;
9205
9206         while (!cache_tree_empty(roots_info_cache)) {
9207                 struct cache_extent *entry;
9208                 struct root_item_info *rii;
9209
9210                 entry = first_cache_extent(roots_info_cache);
9211                 if (!entry)
9212                         break;
9213                 remove_cache_extent(roots_info_cache, entry);
9214                 rii = container_of(entry, struct root_item_info, cache_extent);
9215                 free(rii);
9216         }
9217
9218         free(roots_info_cache);
9219         roots_info_cache = NULL;
9220 }
9221
9222 static int build_roots_info_cache(struct btrfs_fs_info *info)
9223 {
9224         int ret = 0;
9225         struct btrfs_key key;
9226         struct extent_buffer *leaf;
9227         struct btrfs_path *path;
9228
9229         if (!roots_info_cache) {
9230                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9231                 if (!roots_info_cache)
9232                         return -ENOMEM;
9233                 cache_tree_init(roots_info_cache);
9234         }
9235
9236         path = btrfs_alloc_path();
9237         if (!path)
9238                 return -ENOMEM;
9239
9240         key.objectid = 0;
9241         key.type = BTRFS_EXTENT_ITEM_KEY;
9242         key.offset = 0;
9243
9244         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9245         if (ret < 0)
9246                 goto out;
9247         leaf = path->nodes[0];
9248
9249         while (1) {
9250                 struct btrfs_key found_key;
9251                 struct btrfs_extent_item *ei;
9252                 struct btrfs_extent_inline_ref *iref;
9253                 int slot = path->slots[0];
9254                 int type;
9255                 u64 flags;
9256                 u64 root_id;
9257                 u8 level;
9258                 struct cache_extent *entry;
9259                 struct root_item_info *rii;
9260
9261                 if (slot >= btrfs_header_nritems(leaf)) {
9262                         ret = btrfs_next_leaf(info->extent_root, path);
9263                         if (ret < 0) {
9264                                 break;
9265                         } else if (ret) {
9266                                 ret = 0;
9267                                 break;
9268                         }
9269                         leaf = path->nodes[0];
9270                         slot = path->slots[0];
9271                 }
9272
9273                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9274
9275                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9276                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9277                         goto next;
9278
9279                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9280                 flags = btrfs_extent_flags(leaf, ei);
9281
9282                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9283                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9284                         goto next;
9285
9286                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9287                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9288                         level = found_key.offset;
9289                 } else {
9290                         struct btrfs_tree_block_info *binfo;
9291
9292                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9293                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9294                         level = btrfs_tree_block_level(leaf, binfo);
9295                 }
9296
9297                 /*
9298                  * For a root extent, it must be of the following type and the
9299                  * first (and only one) iref in the item.
9300                  */
9301                 type = btrfs_extent_inline_ref_type(leaf, iref);
9302                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9303                         goto next;
9304
9305                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9306                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9307                 if (!entry) {
9308                         rii = malloc(sizeof(struct root_item_info));
9309                         if (!rii) {
9310                                 ret = -ENOMEM;
9311                                 goto out;
9312                         }
9313                         rii->cache_extent.start = root_id;
9314                         rii->cache_extent.size = 1;
9315                         rii->level = (u8)-1;
9316                         entry = &rii->cache_extent;
9317                         ret = insert_cache_extent(roots_info_cache, entry);
9318                         ASSERT(ret == 0);
9319                 } else {
9320                         rii = container_of(entry, struct root_item_info,
9321                                            cache_extent);
9322                 }
9323
9324                 ASSERT(rii->cache_extent.start == root_id);
9325                 ASSERT(rii->cache_extent.size == 1);
9326
9327                 if (level > rii->level || rii->level == (u8)-1) {
9328                         rii->level = level;
9329                         rii->bytenr = found_key.objectid;
9330                         rii->gen = btrfs_extent_generation(leaf, ei);
9331                         rii->node_count = 1;
9332                 } else if (level == rii->level) {
9333                         rii->node_count++;
9334                 }
9335 next:
9336                 path->slots[0]++;
9337         }
9338
9339 out:
9340         btrfs_free_path(path);
9341
9342         return ret;
9343 }
9344
9345 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9346                                   struct btrfs_path *path,
9347                                   const struct btrfs_key *root_key,
9348                                   const int read_only_mode)
9349 {
9350         const u64 root_id = root_key->objectid;
9351         struct cache_extent *entry;
9352         struct root_item_info *rii;
9353         struct btrfs_root_item ri;
9354         unsigned long offset;
9355
9356         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9357         if (!entry) {
9358                 fprintf(stderr,
9359                         "Error: could not find extent items for root %llu\n",
9360                         root_key->objectid);
9361                 return -ENOENT;
9362         }
9363
9364         rii = container_of(entry, struct root_item_info, cache_extent);
9365         ASSERT(rii->cache_extent.start == root_id);
9366         ASSERT(rii->cache_extent.size == 1);
9367
9368         if (rii->node_count != 1) {
9369                 fprintf(stderr,
9370                         "Error: could not find btree root extent for root %llu\n",
9371                         root_id);
9372                 return -ENOENT;
9373         }
9374
9375         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9376         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9377
9378         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9379             btrfs_root_level(&ri) != rii->level ||
9380             btrfs_root_generation(&ri) != rii->gen) {
9381
9382                 /*
9383                  * If we're in repair mode but our caller told us to not update
9384                  * the root item, i.e. just check if it needs to be updated, don't
9385                  * print this message, since the caller will call us again shortly
9386                  * for the same root item without read only mode (the caller will
9387                  * open a transaction first).
9388                  */
9389                 if (!(read_only_mode && repair))
9390                         fprintf(stderr,
9391                                 "%sroot item for root %llu,"
9392                                 " current bytenr %llu, current gen %llu, current level %u,"
9393                                 " new bytenr %llu, new gen %llu, new level %u\n",
9394                                 (read_only_mode ? "" : "fixing "),
9395                                 root_id,
9396                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9397                                 btrfs_root_level(&ri),
9398                                 rii->bytenr, rii->gen, rii->level);
9399
9400                 if (btrfs_root_generation(&ri) > rii->gen) {
9401                         fprintf(stderr,
9402                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9403                                 root_id, btrfs_root_generation(&ri), rii->gen);
9404                         return -EINVAL;
9405                 }
9406
9407                 if (!read_only_mode) {
9408                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9409                         btrfs_set_root_level(&ri, rii->level);
9410                         btrfs_set_root_generation(&ri, rii->gen);
9411                         write_extent_buffer(path->nodes[0], &ri,
9412                                             offset, sizeof(ri));
9413                 }
9414
9415                 return 1;
9416         }
9417
9418         return 0;
9419 }
9420
9421 /*
9422  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9423  * caused read-only snapshots to be corrupted if they were created at a moment
9424  * when the source subvolume/snapshot had orphan items. The issue was that the
9425  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9426  * node instead of the post orphan cleanup root node.
9427  * So this function, and its callees, just detects and fixes those cases. Even
9428  * though the regression was for read-only snapshots, this function applies to
9429  * any snapshot/subvolume root.
9430  * This must be run before any other repair code - not doing it so, makes other
9431  * repair code delete or modify backrefs in the extent tree for example, which
9432  * will result in an inconsistent fs after repairing the root items.
9433  */
9434 static int repair_root_items(struct btrfs_fs_info *info)
9435 {
9436         struct btrfs_path *path = NULL;
9437         struct btrfs_key key;
9438         struct extent_buffer *leaf;
9439         struct btrfs_trans_handle *trans = NULL;
9440         int ret = 0;
9441         int bad_roots = 0;
9442         int need_trans = 0;
9443
9444         ret = build_roots_info_cache(info);
9445         if (ret)
9446                 goto out;
9447
9448         path = btrfs_alloc_path();
9449         if (!path) {
9450                 ret = -ENOMEM;
9451                 goto out;
9452         }
9453
9454         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9455         key.type = BTRFS_ROOT_ITEM_KEY;
9456         key.offset = 0;
9457
9458 again:
9459         /*
9460          * Avoid opening and committing transactions if a leaf doesn't have
9461          * any root items that need to be fixed, so that we avoid rotating
9462          * backup roots unnecessarily.
9463          */
9464         if (need_trans) {
9465                 trans = btrfs_start_transaction(info->tree_root, 1);
9466                 if (IS_ERR(trans)) {
9467                         ret = PTR_ERR(trans);
9468                         goto out;
9469                 }
9470         }
9471
9472         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9473                                 0, trans ? 1 : 0);
9474         if (ret < 0)
9475                 goto out;
9476         leaf = path->nodes[0];
9477
9478         while (1) {
9479                 struct btrfs_key found_key;
9480
9481                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9482                         int no_more_keys = find_next_key(path, &key);
9483
9484                         btrfs_release_path(path);
9485                         if (trans) {
9486                                 ret = btrfs_commit_transaction(trans,
9487                                                                info->tree_root);
9488                                 trans = NULL;
9489                                 if (ret < 0)
9490                                         goto out;
9491                         }
9492                         need_trans = 0;
9493                         if (no_more_keys)
9494                                 break;
9495                         goto again;
9496                 }
9497
9498                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9499
9500                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9501                         goto next;
9502                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9503                         goto next;
9504
9505                 ret = maybe_repair_root_item(info, path, &found_key,
9506                                              trans ? 0 : 1);
9507                 if (ret < 0)
9508                         goto out;
9509                 if (ret) {
9510                         if (!trans && repair) {
9511                                 need_trans = 1;
9512                                 key = found_key;
9513                                 btrfs_release_path(path);
9514                                 goto again;
9515                         }
9516                         bad_roots++;
9517                 }
9518 next:
9519                 path->slots[0]++;
9520         }
9521         ret = 0;
9522 out:
9523         free_roots_info_cache();
9524         btrfs_free_path(path);
9525         if (trans)
9526                 btrfs_commit_transaction(trans, info->tree_root);
9527         if (ret < 0)
9528                 return ret;
9529
9530         return bad_roots;
9531 }
9532
9533 const char * const cmd_check_usage[] = {
9534         "btrfs check [options] <device>",
9535         "Check structural integrity of a filesystem (unmounted).",
9536         "Check structural integrity of an unmounted filesystem. Verify internal",
9537         "trees' consistency and item connectivity. In the repair mode try to",
9538         "fix the problems found.",
9539         "WARNING: the repair mode is considered dangerous",
9540         "",
9541         "-s|--super <superblock>     use this superblock copy",
9542         "-b|--backup                 use the first valid backup root copy",
9543         "--repair                    try to repair the filesystem",
9544         "--readonly                  run in read-only mode (default)",
9545         "--init-csum-tree            create a new CRC tree",
9546         "--init-extent-tree          create a new extent tree",
9547         "--check-data-csum           verify checksums of data blocks",
9548         "-Q|--qgroup-report           print a report on qgroup consistency",
9549         "-E|--subvol-extents <subvolid>",
9550         "                            print subvolume extents and sharing state",
9551         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9552         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9553         "-p|--progress               indicate progress",
9554         NULL
9555 };
9556
9557 int cmd_check(int argc, char **argv)
9558 {
9559         struct cache_tree root_cache;
9560         struct btrfs_root *root;
9561         struct btrfs_fs_info *info;
9562         u64 bytenr = 0;
9563         u64 subvolid = 0;
9564         u64 tree_root_bytenr = 0;
9565         u64 chunk_root_bytenr = 0;
9566         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9567         int ret;
9568         u64 num;
9569         int init_csum_tree = 0;
9570         int readonly = 0;
9571         int qgroup_report = 0;
9572         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9573
9574         while(1) {
9575                 int c;
9576                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9577                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9578                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9579                 static const struct option long_options[] = {
9580                         { "super", required_argument, NULL, 's' },
9581                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9582                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9583                         { "init-csum-tree", no_argument, NULL,
9584                                 GETOPT_VAL_INIT_CSUM },
9585                         { "init-extent-tree", no_argument, NULL,
9586                                 GETOPT_VAL_INIT_EXTENT },
9587                         { "check-data-csum", no_argument, NULL,
9588                                 GETOPT_VAL_CHECK_CSUM },
9589                         { "backup", no_argument, NULL, 'b' },
9590                         { "subvol-extents", required_argument, NULL, 'E' },
9591                         { "qgroup-report", no_argument, NULL, 'Q' },
9592                         { "tree-root", required_argument, NULL, 'r' },
9593                         { "chunk-root", required_argument, NULL,
9594                                 GETOPT_VAL_CHUNK_TREE },
9595                         { "progress", no_argument, NULL, 'p' },
9596                         { NULL, 0, NULL, 0}
9597                 };
9598
9599                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9600                 if (c < 0)
9601                         break;
9602                 switch(c) {
9603                         case 'a': /* ignored */ break;
9604                         case 'b':
9605                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9606                                 break;
9607                         case 's':
9608                                 num = arg_strtou64(optarg);
9609                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9610                                         fprintf(stderr,
9611                                                 "ERROR: super mirror should be less than: %d\n",
9612                                                 BTRFS_SUPER_MIRROR_MAX);
9613                                         exit(1);
9614                                 }
9615                                 bytenr = btrfs_sb_offset(((int)num));
9616                                 printf("using SB copy %llu, bytenr %llu\n", num,
9617                                        (unsigned long long)bytenr);
9618                                 break;
9619                         case 'Q':
9620                                 qgroup_report = 1;
9621                                 break;
9622                         case 'E':
9623                                 subvolid = arg_strtou64(optarg);
9624                                 break;
9625                         case 'r':
9626                                 tree_root_bytenr = arg_strtou64(optarg);
9627                                 break;
9628                         case GETOPT_VAL_CHUNK_TREE:
9629                                 chunk_root_bytenr = arg_strtou64(optarg);
9630                                 break;
9631                         case 'p':
9632                                 ctx.progress_enabled = true;
9633                                 break;
9634                         case '?':
9635                         case 'h':
9636                                 usage(cmd_check_usage);
9637                         case GETOPT_VAL_REPAIR:
9638                                 printf("enabling repair mode\n");
9639                                 repair = 1;
9640                                 ctree_flags |= OPEN_CTREE_WRITES;
9641                                 break;
9642                         case GETOPT_VAL_READONLY:
9643                                 readonly = 1;
9644                                 break;
9645                         case GETOPT_VAL_INIT_CSUM:
9646                                 printf("Creating a new CRC tree\n");
9647                                 init_csum_tree = 1;
9648                                 repair = 1;
9649                                 ctree_flags |= OPEN_CTREE_WRITES;
9650                                 break;
9651                         case GETOPT_VAL_INIT_EXTENT:
9652                                 init_extent_tree = 1;
9653                                 ctree_flags |= (OPEN_CTREE_WRITES |
9654                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9655                                 repair = 1;
9656                                 break;
9657                         case GETOPT_VAL_CHECK_CSUM:
9658                                 check_data_csum = 1;
9659                                 break;
9660                 }
9661         }
9662
9663         if (check_argc_exact(argc - optind, 1))
9664                 usage(cmd_check_usage);
9665
9666         if (ctx.progress_enabled) {
9667                 ctx.tp = TASK_NOTHING;
9668                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9669         }
9670
9671         /* This check is the only reason for --readonly to exist */
9672         if (readonly && repair) {
9673                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9674                 exit(1);
9675         }
9676
9677         radix_tree_init();
9678         cache_tree_init(&root_cache);
9679
9680         if((ret = check_mounted(argv[optind])) < 0) {
9681                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9682                 goto err_out;
9683         } else if(ret) {
9684                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9685                 ret = -EBUSY;
9686                 goto err_out;
9687         }
9688
9689         /* only allow partial opening under repair mode */
9690         if (repair)
9691                 ctree_flags |= OPEN_CTREE_PARTIAL;
9692
9693         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9694                                   chunk_root_bytenr, ctree_flags);
9695         if (!info) {
9696                 fprintf(stderr, "Couldn't open file system\n");
9697                 ret = -EIO;
9698                 goto err_out;
9699         }
9700
9701         global_info = info;
9702         root = info->fs_root;
9703
9704         /*
9705          * repair mode will force us to commit transaction which
9706          * will make us fail to load log tree when mounting.
9707          */
9708         if (repair && btrfs_super_log_root(info->super_copy)) {
9709                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9710                 if (!ret) {
9711                         ret = 1;
9712                         goto close_out;
9713                 }
9714                 ret = zero_log_tree(root);
9715                 if (ret) {
9716                         fprintf(stderr, "fail to zero log tree\n");
9717                         goto close_out;
9718                 }
9719         }
9720
9721         uuid_unparse(info->super_copy->fsid, uuidbuf);
9722         if (qgroup_report) {
9723                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9724                        uuidbuf);
9725                 ret = qgroup_verify_all(info);
9726                 if (ret == 0)
9727                         ret = report_qgroups(1);
9728                 goto close_out;
9729         }
9730         if (subvolid) {
9731                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9732                        subvolid, argv[optind], uuidbuf);
9733                 ret = print_extent_state(info, subvolid);
9734                 goto close_out;
9735         }
9736         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9737
9738         if (!extent_buffer_uptodate(info->tree_root->node) ||
9739             !extent_buffer_uptodate(info->dev_root->node) ||
9740             !extent_buffer_uptodate(info->chunk_root->node)) {
9741                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9742                 ret = -EIO;
9743                 goto close_out;
9744         }
9745
9746         if (init_extent_tree || init_csum_tree) {
9747                 struct btrfs_trans_handle *trans;
9748
9749                 trans = btrfs_start_transaction(info->extent_root, 0);
9750                 if (IS_ERR(trans)) {
9751                         fprintf(stderr, "Error starting transaction\n");
9752                         ret = PTR_ERR(trans);
9753                         goto close_out;
9754                 }
9755
9756                 if (init_extent_tree) {
9757                         printf("Creating a new extent tree\n");
9758                         ret = reinit_extent_tree(trans, info);
9759                         if (ret)
9760                                 goto close_out;
9761                 }
9762
9763                 if (init_csum_tree) {
9764                         fprintf(stderr, "Reinit crc root\n");
9765                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9766                         if (ret) {
9767                                 fprintf(stderr, "crc root initialization failed\n");
9768                                 ret = -EIO;
9769                                 goto close_out;
9770                         }
9771
9772                         ret = fill_csum_tree(trans, info->csum_root,
9773                                              init_extent_tree);
9774                         if (ret) {
9775                                 fprintf(stderr, "crc refilling failed\n");
9776                                 return -EIO;
9777                         }
9778                 }
9779                 /*
9780                  * Ok now we commit and run the normal fsck, which will add
9781                  * extent entries for all of the items it finds.
9782                  */
9783                 ret = btrfs_commit_transaction(trans, info->extent_root);
9784                 if (ret)
9785                         goto close_out;
9786         }
9787         if (!extent_buffer_uptodate(info->extent_root->node)) {
9788                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9789                 ret = -EIO;
9790                 goto close_out;
9791         }
9792         if (!extent_buffer_uptodate(info->csum_root->node)) {
9793                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9794                 ret = -EIO;
9795                 goto close_out;
9796         }
9797
9798         if (!ctx.progress_enabled)
9799                 fprintf(stderr, "checking extents\n");
9800         ret = check_chunks_and_extents(root);
9801         if (ret)
9802                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9803
9804         ret = repair_root_items(info);
9805         if (ret < 0)
9806                 goto close_out;
9807         if (repair) {
9808                 fprintf(stderr, "Fixed %d roots.\n", ret);
9809                 ret = 0;
9810         } else if (ret > 0) {
9811                 fprintf(stderr,
9812                        "Found %d roots with an outdated root item.\n",
9813                        ret);
9814                 fprintf(stderr,
9815                         "Please run a filesystem check with the option --repair to fix them.\n");
9816                 ret = 1;
9817                 goto close_out;
9818         }
9819
9820         if (!ctx.progress_enabled) {
9821                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9822                         fprintf(stderr, "checking free space tree\n");
9823                 else
9824                         fprintf(stderr, "checking free space cache\n");
9825         }
9826         ret = check_space_cache(root);
9827         if (ret)
9828                 goto out;
9829
9830         /*
9831          * We used to have to have these hole extents in between our real
9832          * extents so if we don't have this flag set we need to make sure there
9833          * are no gaps in the file extents for inodes, otherwise we can just
9834          * ignore it when this happens.
9835          */
9836         no_holes = btrfs_fs_incompat(root->fs_info,
9837                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9838         if (!ctx.progress_enabled)
9839                 fprintf(stderr, "checking fs roots\n");
9840         ret = check_fs_roots(root, &root_cache);
9841         if (ret)
9842                 goto out;
9843
9844         fprintf(stderr, "checking csums\n");
9845         ret = check_csums(root);
9846         if (ret)
9847                 goto out;
9848
9849         fprintf(stderr, "checking root refs\n");
9850         ret = check_root_refs(root, &root_cache);
9851         if (ret)
9852                 goto out;
9853
9854         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9855                 struct extent_buffer *eb;
9856
9857                 eb = list_first_entry(&root->fs_info->recow_ebs,
9858                                       struct extent_buffer, recow);
9859                 list_del_init(&eb->recow);
9860                 ret = recow_extent_buffer(root, eb);
9861                 if (ret)
9862                         break;
9863         }
9864
9865         while (!list_empty(&delete_items)) {
9866                 struct bad_item *bad;
9867
9868                 bad = list_first_entry(&delete_items, struct bad_item, list);
9869                 list_del_init(&bad->list);
9870                 if (repair)
9871                         ret = delete_bad_item(root, bad);
9872                 free(bad);
9873         }
9874
9875         if (info->quota_enabled) {
9876                 int err;
9877                 fprintf(stderr, "checking quota groups\n");
9878                 err = qgroup_verify_all(info);
9879                 if (err)
9880                         goto out;
9881         }
9882
9883         if (!list_empty(&root->fs_info->recow_ebs)) {
9884                 fprintf(stderr, "Transid errors in file system\n");
9885                 ret = 1;
9886         }
9887 out:
9888         /* Don't override original ret */
9889         if (ret)
9890                 report_qgroups(0);
9891         else
9892                 ret = report_qgroups(0);
9893         if (found_old_backref) { /*
9894                  * there was a disk format change when mixed
9895                  * backref was in testing tree. The old format
9896                  * existed about one week.
9897                  */
9898                 printf("\n * Found old mixed backref format. "
9899                        "The old format is not supported! *"
9900                        "\n * Please mount the FS in readonly mode, "
9901                        "backup data and re-format the FS. *\n\n");
9902                 ret = 1;
9903         }
9904         printf("found %llu bytes used err is %d\n",
9905                (unsigned long long)bytes_used, ret);
9906         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9907         printf("total tree bytes: %llu\n",
9908                (unsigned long long)total_btree_bytes);
9909         printf("total fs tree bytes: %llu\n",
9910                (unsigned long long)total_fs_tree_bytes);
9911         printf("total extent tree bytes: %llu\n",
9912                (unsigned long long)total_extent_tree_bytes);
9913         printf("btree space waste bytes: %llu\n",
9914                (unsigned long long)btree_space_waste);
9915         printf("file data blocks allocated: %llu\n referenced %llu\n",
9916                 (unsigned long long)data_bytes_allocated,
9917                 (unsigned long long)data_bytes_referenced);
9918
9919         free_qgroup_counts();
9920         free_root_recs_tree(&root_cache);
9921 close_out:
9922         close_ctree(root);
9923 err_out:
9924         if (ctx.progress_enabled)
9925                 task_deinit(ctx.info);
9926
9927         return ret;
9928 }