btrfs-progs: check: simplify assignments in add_extent_rec_nolookup
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 struct extent_backref {
79         struct list_head list;
80         unsigned int is_data:1;
81         unsigned int found_extent_tree:1;
82         unsigned int full_backref:1;
83         unsigned int found_ref:1;
84         unsigned int broken:1;
85 };
86
87 struct data_backref {
88         struct extent_backref node;
89         union {
90                 u64 parent;
91                 u64 root;
92         };
93         u64 owner;
94         u64 offset;
95         u64 disk_bytenr;
96         u64 bytes;
97         u64 ram_bytes;
98         u32 num_refs;
99         u32 found_ref;
100 };
101
102 /*
103  * Much like data_backref, just removed the undetermined members
104  * and change it to use list_head.
105  * During extent scan, it is stored in root->orphan_data_extent.
106  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
107  */
108 struct orphan_data_extent {
109         struct list_head list;
110         u64 root;
111         u64 objectid;
112         u64 offset;
113         u64 disk_bytenr;
114         u64 disk_len;
115 };
116
117 struct tree_backref {
118         struct extent_backref node;
119         union {
120                 u64 parent;
121                 u64 root;
122         };
123 };
124
125 struct extent_record {
126         struct list_head backrefs;
127         struct list_head dups;
128         struct list_head list;
129         struct cache_extent cache;
130         struct btrfs_disk_key parent_key;
131         u64 start;
132         u64 max_size;
133         u64 nr;
134         u64 refs;
135         u64 extent_item_refs;
136         u64 generation;
137         u64 parent_generation;
138         u64 info_objectid;
139         u32 num_duplicates;
140         u8 info_level;
141         int flag_block_full_backref;
142         unsigned int found_rec:1;
143         unsigned int content_checked:1;
144         unsigned int owner_ref_checked:1;
145         unsigned int is_root:1;
146         unsigned int metadata:1;
147         unsigned int bad_full_backref:1;
148         unsigned int crossing_stripes:1;
149         unsigned int wrong_chunk_type:1;
150 };
151
152 struct inode_backref {
153         struct list_head list;
154         unsigned int found_dir_item:1;
155         unsigned int found_dir_index:1;
156         unsigned int found_inode_ref:1;
157         unsigned int filetype:8;
158         int errors;
159         unsigned int ref_type;
160         u64 dir;
161         u64 index;
162         u16 namelen;
163         char name[0];
164 };
165
166 struct root_item_record {
167         struct list_head list;
168         u64 objectid;
169         u64 bytenr;
170         u64 last_snapshot;
171         u8 level;
172         u8 drop_level;
173         int level_size;
174         struct btrfs_key drop_key;
175 };
176
177 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
178 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
179 #define REF_ERR_NO_INODE_REF            (1 << 2)
180 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
181 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
182 #define REF_ERR_DUP_INODE_REF           (1 << 5)
183 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
184 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
185 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
186 #define REF_ERR_NO_ROOT_REF             (1 << 9)
187 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
188 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
189 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
190
191 struct file_extent_hole {
192         struct rb_node node;
193         u64 start;
194         u64 len;
195 };
196
197 struct inode_record {
198         struct list_head backrefs;
199         unsigned int checked:1;
200         unsigned int merging:1;
201         unsigned int found_inode_item:1;
202         unsigned int found_dir_item:1;
203         unsigned int found_file_extent:1;
204         unsigned int found_csum_item:1;
205         unsigned int some_csum_missing:1;
206         unsigned int nodatasum:1;
207         int errors;
208
209         u64 ino;
210         u32 nlink;
211         u32 imode;
212         u64 isize;
213         u64 nbytes;
214
215         u32 found_link;
216         u64 found_size;
217         u64 extent_start;
218         u64 extent_end;
219         struct rb_root holes;
220         struct list_head orphan_extents;
221
222         u32 refs;
223 };
224
225 #define I_ERR_NO_INODE_ITEM             (1 << 0)
226 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
227 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
228 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
229 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
230 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
231 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
232 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
233 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
234 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
235 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
236 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
237 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
238 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
239 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
240
241 struct root_backref {
242         struct list_head list;
243         unsigned int found_dir_item:1;
244         unsigned int found_dir_index:1;
245         unsigned int found_back_ref:1;
246         unsigned int found_forward_ref:1;
247         unsigned int reachable:1;
248         int errors;
249         u64 ref_root;
250         u64 dir;
251         u64 index;
252         u16 namelen;
253         char name[0];
254 };
255
256 struct root_record {
257         struct list_head backrefs;
258         struct cache_extent cache;
259         unsigned int found_root_item:1;
260         u64 objectid;
261         u32 found_ref;
262 };
263
264 struct ptr_node {
265         struct cache_extent cache;
266         void *data;
267 };
268
269 struct shared_node {
270         struct cache_extent cache;
271         struct cache_tree root_cache;
272         struct cache_tree inode_cache;
273         struct inode_record *current;
274         u32 refs;
275 };
276
277 struct block_info {
278         u64 start;
279         u32 size;
280 };
281
282 struct walk_control {
283         struct cache_tree shared;
284         struct shared_node *nodes[BTRFS_MAX_LEVEL];
285         int active_node;
286         int root_level;
287 };
288
289 struct bad_item {
290         struct btrfs_key key;
291         u64 root_id;
292         struct list_head list;
293 };
294
295 struct extent_entry {
296         u64 bytenr;
297         u64 bytes;
298         int count;
299         int broken;
300         struct list_head list;
301 };
302
303 struct root_item_info {
304         /* level of the root */
305         u8 level;
306         /* number of nodes at this level, must be 1 for a root */
307         int node_count;
308         u64 bytenr;
309         u64 gen;
310         struct cache_extent cache_extent;
311 };
312
313 static void *print_status_check(void *p)
314 {
315         struct task_ctx *priv = p;
316         const char work_indicator[] = { '.', 'o', 'O', 'o' };
317         uint32_t count = 0;
318         static char *task_position_string[] = {
319                 "checking extents",
320                 "checking free space cache",
321                 "checking fs roots",
322         };
323
324         task_period_start(priv->info, 1000 /* 1s */);
325
326         if (priv->tp == TASK_NOTHING)
327                 return NULL;
328
329         while (1) {
330                 printf("%s [%c]\r", task_position_string[priv->tp],
331                                 work_indicator[count % 4]);
332                 count++;
333                 fflush(stdout);
334                 task_period_wait(priv->info);
335         }
336         return NULL;
337 }
338
339 static int print_status_return(void *p)
340 {
341         printf("\n");
342         fflush(stdout);
343
344         return 0;
345 }
346
347 /* Compatible function to allow reuse of old codes */
348 static u64 first_extent_gap(struct rb_root *holes)
349 {
350         struct file_extent_hole *hole;
351
352         if (RB_EMPTY_ROOT(holes))
353                 return (u64)-1;
354
355         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
356         return hole->start;
357 }
358
359 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
360 {
361         struct file_extent_hole *hole1;
362         struct file_extent_hole *hole2;
363
364         hole1 = rb_entry(node1, struct file_extent_hole, node);
365         hole2 = rb_entry(node2, struct file_extent_hole, node);
366
367         if (hole1->start > hole2->start)
368                 return -1;
369         if (hole1->start < hole2->start)
370                 return 1;
371         /* Now hole1->start == hole2->start */
372         if (hole1->len >= hole2->len)
373                 /*
374                  * Hole 1 will be merge center
375                  * Same hole will be merged later
376                  */
377                 return -1;
378         /* Hole 2 will be merge center */
379         return 1;
380 }
381
382 /*
383  * Add a hole to the record
384  *
385  * This will do hole merge for copy_file_extent_holes(),
386  * which will ensure there won't be continuous holes.
387  */
388 static int add_file_extent_hole(struct rb_root *holes,
389                                 u64 start, u64 len)
390 {
391         struct file_extent_hole *hole;
392         struct file_extent_hole *prev = NULL;
393         struct file_extent_hole *next = NULL;
394
395         hole = malloc(sizeof(*hole));
396         if (!hole)
397                 return -ENOMEM;
398         hole->start = start;
399         hole->len = len;
400         /* Since compare will not return 0, no -EEXIST will happen */
401         rb_insert(holes, &hole->node, compare_hole);
402
403         /* simple merge with previous hole */
404         if (rb_prev(&hole->node))
405                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
406                                 node);
407         if (prev && prev->start + prev->len >= hole->start) {
408                 hole->len = hole->start + hole->len - prev->start;
409                 hole->start = prev->start;
410                 rb_erase(&prev->node, holes);
411                 free(prev);
412                 prev = NULL;
413         }
414
415         /* iterate merge with next holes */
416         while (1) {
417                 if (!rb_next(&hole->node))
418                         break;
419                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
420                                         node);
421                 if (hole->start + hole->len >= next->start) {
422                         if (hole->start + hole->len <= next->start + next->len)
423                                 hole->len = next->start + next->len -
424                                             hole->start;
425                         rb_erase(&next->node, holes);
426                         free(next);
427                         next = NULL;
428                 } else
429                         break;
430         }
431         return 0;
432 }
433
434 static int compare_hole_range(struct rb_node *node, void *data)
435 {
436         struct file_extent_hole *hole;
437         u64 start;
438
439         hole = (struct file_extent_hole *)data;
440         start = hole->start;
441
442         hole = rb_entry(node, struct file_extent_hole, node);
443         if (start < hole->start)
444                 return -1;
445         if (start >= hole->start && start < hole->start + hole->len)
446                 return 0;
447         return 1;
448 }
449
450 /*
451  * Delete a hole in the record
452  *
453  * This will do the hole split and is much restrict than add.
454  */
455 static int del_file_extent_hole(struct rb_root *holes,
456                                 u64 start, u64 len)
457 {
458         struct file_extent_hole *hole;
459         struct file_extent_hole tmp;
460         u64 prev_start = 0;
461         u64 prev_len = 0;
462         u64 next_start = 0;
463         u64 next_len = 0;
464         struct rb_node *node;
465         int have_prev = 0;
466         int have_next = 0;
467         int ret = 0;
468
469         tmp.start = start;
470         tmp.len = len;
471         node = rb_search(holes, &tmp, compare_hole_range, NULL);
472         if (!node)
473                 return -EEXIST;
474         hole = rb_entry(node, struct file_extent_hole, node);
475         if (start + len > hole->start + hole->len)
476                 return -EEXIST;
477
478         /*
479          * Now there will be no overflap, delete the hole and re-add the
480          * split(s) if they exists.
481          */
482         if (start > hole->start) {
483                 prev_start = hole->start;
484                 prev_len = start - hole->start;
485                 have_prev = 1;
486         }
487         if (hole->start + hole->len > start + len) {
488                 next_start = start + len;
489                 next_len = hole->start + hole->len - start - len;
490                 have_next = 1;
491         }
492         rb_erase(node, holes);
493         free(hole);
494         if (have_prev) {
495                 ret = add_file_extent_hole(holes, prev_start, prev_len);
496                 if (ret < 0)
497                         return ret;
498         }
499         if (have_next) {
500                 ret = add_file_extent_hole(holes, next_start, next_len);
501                 if (ret < 0)
502                         return ret;
503         }
504         return 0;
505 }
506
507 static int copy_file_extent_holes(struct rb_root *dst,
508                                   struct rb_root *src)
509 {
510         struct file_extent_hole *hole;
511         struct rb_node *node;
512         int ret = 0;
513
514         node = rb_first(src);
515         while (node) {
516                 hole = rb_entry(node, struct file_extent_hole, node);
517                 ret = add_file_extent_hole(dst, hole->start, hole->len);
518                 if (ret)
519                         break;
520                 node = rb_next(node);
521         }
522         return ret;
523 }
524
525 static void free_file_extent_holes(struct rb_root *holes)
526 {
527         struct rb_node *node;
528         struct file_extent_hole *hole;
529
530         node = rb_first(holes);
531         while (node) {
532                 hole = rb_entry(node, struct file_extent_hole, node);
533                 rb_erase(node, holes);
534                 free(hole);
535                 node = rb_first(holes);
536         }
537 }
538
539 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
540
541 static void record_root_in_trans(struct btrfs_trans_handle *trans,
542                                  struct btrfs_root *root)
543 {
544         if (root->last_trans != trans->transid) {
545                 root->track_dirty = 1;
546                 root->last_trans = trans->transid;
547                 root->commit_root = root->node;
548                 extent_buffer_get(root->node);
549         }
550 }
551
552 static u8 imode_to_type(u32 imode)
553 {
554 #define S_SHIFT 12
555         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
556                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
557                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
558                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
559                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
560                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
561                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
562                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
563         };
564
565         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
566 #undef S_SHIFT
567 }
568
569 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
570 {
571         struct device_record *rec1;
572         struct device_record *rec2;
573
574         rec1 = rb_entry(node1, struct device_record, node);
575         rec2 = rb_entry(node2, struct device_record, node);
576         if (rec1->devid > rec2->devid)
577                 return -1;
578         else if (rec1->devid < rec2->devid)
579                 return 1;
580         else
581                 return 0;
582 }
583
584 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
585 {
586         struct inode_record *rec;
587         struct inode_backref *backref;
588         struct inode_backref *orig;
589         struct inode_backref *tmp;
590         struct orphan_data_extent *src_orphan;
591         struct orphan_data_extent *dst_orphan;
592         size_t size;
593         int ret;
594
595         rec = malloc(sizeof(*rec));
596         if (!rec)
597                 return ERR_PTR(-ENOMEM);
598         memcpy(rec, orig_rec, sizeof(*rec));
599         rec->refs = 1;
600         INIT_LIST_HEAD(&rec->backrefs);
601         INIT_LIST_HEAD(&rec->orphan_extents);
602         rec->holes = RB_ROOT;
603
604         list_for_each_entry(orig, &orig_rec->backrefs, list) {
605                 size = sizeof(*orig) + orig->namelen + 1;
606                 backref = malloc(size);
607                 if (!backref) {
608                         ret = -ENOMEM;
609                         goto cleanup;
610                 }
611                 memcpy(backref, orig, size);
612                 list_add_tail(&backref->list, &rec->backrefs);
613         }
614         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
615                 dst_orphan = malloc(sizeof(*dst_orphan));
616                 if (!dst_orphan) {
617                         ret = -ENOMEM;
618                         goto cleanup;
619                 }
620                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
621                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
622         }
623         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
624         BUG_ON(ret < 0);
625
626         return rec;
627
628 cleanup:
629         if (!list_empty(&rec->backrefs))
630                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
631                         list_del(&orig->list);
632                         free(orig);
633                 }
634
635         if (!list_empty(&rec->orphan_extents))
636                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
637                         list_del(&orig->list);
638                         free(orig);
639                 }
640
641         free(rec);
642
643         return ERR_PTR(ret);
644 }
645
646 static void print_orphan_data_extents(struct list_head *orphan_extents,
647                                       u64 objectid)
648 {
649         struct orphan_data_extent *orphan;
650
651         if (list_empty(orphan_extents))
652                 return;
653         printf("The following data extent is lost in tree %llu:\n",
654                objectid);
655         list_for_each_entry(orphan, orphan_extents, list) {
656                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
657                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
658                        orphan->disk_len);
659         }
660 }
661
662 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
663 {
664         u64 root_objectid = root->root_key.objectid;
665         int errors = rec->errors;
666
667         if (!errors)
668                 return;
669         /* reloc root errors, we print its corresponding fs root objectid*/
670         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
671                 root_objectid = root->root_key.offset;
672                 fprintf(stderr, "reloc");
673         }
674         fprintf(stderr, "root %llu inode %llu errors %x",
675                 (unsigned long long) root_objectid,
676                 (unsigned long long) rec->ino, rec->errors);
677
678         if (errors & I_ERR_NO_INODE_ITEM)
679                 fprintf(stderr, ", no inode item");
680         if (errors & I_ERR_NO_ORPHAN_ITEM)
681                 fprintf(stderr, ", no orphan item");
682         if (errors & I_ERR_DUP_INODE_ITEM)
683                 fprintf(stderr, ", dup inode item");
684         if (errors & I_ERR_DUP_DIR_INDEX)
685                 fprintf(stderr, ", dup dir index");
686         if (errors & I_ERR_ODD_DIR_ITEM)
687                 fprintf(stderr, ", odd dir item");
688         if (errors & I_ERR_ODD_FILE_EXTENT)
689                 fprintf(stderr, ", odd file extent");
690         if (errors & I_ERR_BAD_FILE_EXTENT)
691                 fprintf(stderr, ", bad file extent");
692         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
693                 fprintf(stderr, ", file extent overlap");
694         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
695                 fprintf(stderr, ", file extent discount");
696         if (errors & I_ERR_DIR_ISIZE_WRONG)
697                 fprintf(stderr, ", dir isize wrong");
698         if (errors & I_ERR_FILE_NBYTES_WRONG)
699                 fprintf(stderr, ", nbytes wrong");
700         if (errors & I_ERR_ODD_CSUM_ITEM)
701                 fprintf(stderr, ", odd csum item");
702         if (errors & I_ERR_SOME_CSUM_MISSING)
703                 fprintf(stderr, ", some csum missing");
704         if (errors & I_ERR_LINK_COUNT_WRONG)
705                 fprintf(stderr, ", link count wrong");
706         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
707                 fprintf(stderr, ", orphan file extent");
708         fprintf(stderr, "\n");
709         /* Print the orphan extents if needed */
710         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
711                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
712
713         /* Print the holes if needed */
714         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
715                 struct file_extent_hole *hole;
716                 struct rb_node *node;
717                 int found = 0;
718
719                 node = rb_first(&rec->holes);
720                 fprintf(stderr, "Found file extent holes:\n");
721                 while (node) {
722                         found = 1;
723                         hole = rb_entry(node, struct file_extent_hole, node);
724                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
725                                 hole->start, hole->len);
726                         node = rb_next(node);
727                 }
728                 if (!found)
729                         fprintf(stderr, "\tstart: 0, len: %llu\n",
730                                 round_up(rec->isize, root->sectorsize));
731         }
732 }
733
734 static void print_ref_error(int errors)
735 {
736         if (errors & REF_ERR_NO_DIR_ITEM)
737                 fprintf(stderr, ", no dir item");
738         if (errors & REF_ERR_NO_DIR_INDEX)
739                 fprintf(stderr, ", no dir index");
740         if (errors & REF_ERR_NO_INODE_REF)
741                 fprintf(stderr, ", no inode ref");
742         if (errors & REF_ERR_DUP_DIR_ITEM)
743                 fprintf(stderr, ", dup dir item");
744         if (errors & REF_ERR_DUP_DIR_INDEX)
745                 fprintf(stderr, ", dup dir index");
746         if (errors & REF_ERR_DUP_INODE_REF)
747                 fprintf(stderr, ", dup inode ref");
748         if (errors & REF_ERR_INDEX_UNMATCH)
749                 fprintf(stderr, ", index unmatch");
750         if (errors & REF_ERR_FILETYPE_UNMATCH)
751                 fprintf(stderr, ", filetype unmatch");
752         if (errors & REF_ERR_NAME_TOO_LONG)
753                 fprintf(stderr, ", name too long");
754         if (errors & REF_ERR_NO_ROOT_REF)
755                 fprintf(stderr, ", no root ref");
756         if (errors & REF_ERR_NO_ROOT_BACKREF)
757                 fprintf(stderr, ", no root backref");
758         if (errors & REF_ERR_DUP_ROOT_REF)
759                 fprintf(stderr, ", dup root ref");
760         if (errors & REF_ERR_DUP_ROOT_BACKREF)
761                 fprintf(stderr, ", dup root backref");
762         fprintf(stderr, "\n");
763 }
764
765 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
766                                           u64 ino, int mod)
767 {
768         struct ptr_node *node;
769         struct cache_extent *cache;
770         struct inode_record *rec = NULL;
771         int ret;
772
773         cache = lookup_cache_extent(inode_cache, ino, 1);
774         if (cache) {
775                 node = container_of(cache, struct ptr_node, cache);
776                 rec = node->data;
777                 if (mod && rec->refs > 1) {
778                         node->data = clone_inode_rec(rec);
779                         if (IS_ERR(node->data))
780                                 return node->data;
781                         rec->refs--;
782                         rec = node->data;
783                 }
784         } else if (mod) {
785                 rec = calloc(1, sizeof(*rec));
786                 if (!rec)
787                         return ERR_PTR(-ENOMEM);
788                 rec->ino = ino;
789                 rec->extent_start = (u64)-1;
790                 rec->refs = 1;
791                 INIT_LIST_HEAD(&rec->backrefs);
792                 INIT_LIST_HEAD(&rec->orphan_extents);
793                 rec->holes = RB_ROOT;
794
795                 node = malloc(sizeof(*node));
796                 if (!node) {
797                         free(rec);
798                         return ERR_PTR(-ENOMEM);
799                 }
800                 node->cache.start = ino;
801                 node->cache.size = 1;
802                 node->data = rec;
803
804                 if (ino == BTRFS_FREE_INO_OBJECTID)
805                         rec->found_link = 1;
806
807                 ret = insert_cache_extent(inode_cache, &node->cache);
808                 if (ret)
809                         return ERR_PTR(-EEXIST);
810         }
811         return rec;
812 }
813
814 static void free_orphan_data_extents(struct list_head *orphan_extents)
815 {
816         struct orphan_data_extent *orphan;
817
818         while (!list_empty(orphan_extents)) {
819                 orphan = list_entry(orphan_extents->next,
820                                     struct orphan_data_extent, list);
821                 list_del(&orphan->list);
822                 free(orphan);
823         }
824 }
825
826 static void free_inode_rec(struct inode_record *rec)
827 {
828         struct inode_backref *backref;
829
830         if (--rec->refs > 0)
831                 return;
832
833         while (!list_empty(&rec->backrefs)) {
834                 backref = list_entry(rec->backrefs.next,
835                                      struct inode_backref, list);
836                 list_del(&backref->list);
837                 free(backref);
838         }
839         free_orphan_data_extents(&rec->orphan_extents);
840         free_file_extent_holes(&rec->holes);
841         free(rec);
842 }
843
844 static int can_free_inode_rec(struct inode_record *rec)
845 {
846         if (!rec->errors && rec->checked && rec->found_inode_item &&
847             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
848                 return 1;
849         return 0;
850 }
851
852 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
853                                  struct inode_record *rec)
854 {
855         struct cache_extent *cache;
856         struct inode_backref *tmp, *backref;
857         struct ptr_node *node;
858         unsigned char filetype;
859
860         if (!rec->found_inode_item)
861                 return;
862
863         filetype = imode_to_type(rec->imode);
864         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
865                 if (backref->found_dir_item && backref->found_dir_index) {
866                         if (backref->filetype != filetype)
867                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
868                         if (!backref->errors && backref->found_inode_ref &&
869                             rec->nlink == rec->found_link) {
870                                 list_del(&backref->list);
871                                 free(backref);
872                         }
873                 }
874         }
875
876         if (!rec->checked || rec->merging)
877                 return;
878
879         if (S_ISDIR(rec->imode)) {
880                 if (rec->found_size != rec->isize)
881                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
882                 if (rec->found_file_extent)
883                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
884         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
885                 if (rec->found_dir_item)
886                         rec->errors |= I_ERR_ODD_DIR_ITEM;
887                 if (rec->found_size != rec->nbytes)
888                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
889                 if (rec->nlink > 0 && !no_holes &&
890                     (rec->extent_end < rec->isize ||
891                      first_extent_gap(&rec->holes) < rec->isize))
892                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
893         }
894
895         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
896                 if (rec->found_csum_item && rec->nodatasum)
897                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
898                 if (rec->some_csum_missing && !rec->nodatasum)
899                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
900         }
901
902         BUG_ON(rec->refs != 1);
903         if (can_free_inode_rec(rec)) {
904                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
905                 node = container_of(cache, struct ptr_node, cache);
906                 BUG_ON(node->data != rec);
907                 remove_cache_extent(inode_cache, &node->cache);
908                 free(node);
909                 free_inode_rec(rec);
910         }
911 }
912
913 static int check_orphan_item(struct btrfs_root *root, u64 ino)
914 {
915         struct btrfs_path path;
916         struct btrfs_key key;
917         int ret;
918
919         key.objectid = BTRFS_ORPHAN_OBJECTID;
920         key.type = BTRFS_ORPHAN_ITEM_KEY;
921         key.offset = ino;
922
923         btrfs_init_path(&path);
924         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
925         btrfs_release_path(&path);
926         if (ret > 0)
927                 ret = -ENOENT;
928         return ret;
929 }
930
931 static int process_inode_item(struct extent_buffer *eb,
932                               int slot, struct btrfs_key *key,
933                               struct shared_node *active_node)
934 {
935         struct inode_record *rec;
936         struct btrfs_inode_item *item;
937
938         rec = active_node->current;
939         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
940         if (rec->found_inode_item) {
941                 rec->errors |= I_ERR_DUP_INODE_ITEM;
942                 return 1;
943         }
944         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
945         rec->nlink = btrfs_inode_nlink(eb, item);
946         rec->isize = btrfs_inode_size(eb, item);
947         rec->nbytes = btrfs_inode_nbytes(eb, item);
948         rec->imode = btrfs_inode_mode(eb, item);
949         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
950                 rec->nodatasum = 1;
951         rec->found_inode_item = 1;
952         if (rec->nlink == 0)
953                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
954         maybe_free_inode_rec(&active_node->inode_cache, rec);
955         return 0;
956 }
957
958 static struct inode_backref *get_inode_backref(struct inode_record *rec,
959                                                 const char *name,
960                                                 int namelen, u64 dir)
961 {
962         struct inode_backref *backref;
963
964         list_for_each_entry(backref, &rec->backrefs, list) {
965                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
966                         break;
967                 if (backref->dir != dir || backref->namelen != namelen)
968                         continue;
969                 if (memcmp(name, backref->name, namelen))
970                         continue;
971                 return backref;
972         }
973
974         backref = malloc(sizeof(*backref) + namelen + 1);
975         if (!backref)
976                 return NULL;
977         memset(backref, 0, sizeof(*backref));
978         backref->dir = dir;
979         backref->namelen = namelen;
980         memcpy(backref->name, name, namelen);
981         backref->name[namelen] = '\0';
982         list_add_tail(&backref->list, &rec->backrefs);
983         return backref;
984 }
985
986 static int add_inode_backref(struct cache_tree *inode_cache,
987                              u64 ino, u64 dir, u64 index,
988                              const char *name, int namelen,
989                              int filetype, int itemtype, int errors)
990 {
991         struct inode_record *rec;
992         struct inode_backref *backref;
993
994         rec = get_inode_rec(inode_cache, ino, 1);
995         BUG_ON(IS_ERR(rec));
996         backref = get_inode_backref(rec, name, namelen, dir);
997         BUG_ON(!backref);
998         if (errors)
999                 backref->errors |= errors;
1000         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1001                 if (backref->found_dir_index)
1002                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1003                 if (backref->found_inode_ref && backref->index != index)
1004                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1005                 if (backref->found_dir_item && backref->filetype != filetype)
1006                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1007
1008                 backref->index = index;
1009                 backref->filetype = filetype;
1010                 backref->found_dir_index = 1;
1011         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1012                 rec->found_link++;
1013                 if (backref->found_dir_item)
1014                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1015                 if (backref->found_dir_index && backref->filetype != filetype)
1016                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1017
1018                 backref->filetype = filetype;
1019                 backref->found_dir_item = 1;
1020         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1021                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1022                 if (backref->found_inode_ref)
1023                         backref->errors |= REF_ERR_DUP_INODE_REF;
1024                 if (backref->found_dir_index && backref->index != index)
1025                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1026                 else
1027                         backref->index = index;
1028
1029                 backref->ref_type = itemtype;
1030                 backref->found_inode_ref = 1;
1031         } else {
1032                 BUG_ON(1);
1033         }
1034
1035         maybe_free_inode_rec(inode_cache, rec);
1036         return 0;
1037 }
1038
1039 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1040                             struct cache_tree *dst_cache)
1041 {
1042         struct inode_backref *backref;
1043         u32 dir_count = 0;
1044         int ret = 0;
1045
1046         dst->merging = 1;
1047         list_for_each_entry(backref, &src->backrefs, list) {
1048                 if (backref->found_dir_index) {
1049                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1050                                         backref->index, backref->name,
1051                                         backref->namelen, backref->filetype,
1052                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1053                 }
1054                 if (backref->found_dir_item) {
1055                         dir_count++;
1056                         add_inode_backref(dst_cache, dst->ino,
1057                                         backref->dir, 0, backref->name,
1058                                         backref->namelen, backref->filetype,
1059                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1060                 }
1061                 if (backref->found_inode_ref) {
1062                         add_inode_backref(dst_cache, dst->ino,
1063                                         backref->dir, backref->index,
1064                                         backref->name, backref->namelen, 0,
1065                                         backref->ref_type, backref->errors);
1066                 }
1067         }
1068
1069         if (src->found_dir_item)
1070                 dst->found_dir_item = 1;
1071         if (src->found_file_extent)
1072                 dst->found_file_extent = 1;
1073         if (src->found_csum_item)
1074                 dst->found_csum_item = 1;
1075         if (src->some_csum_missing)
1076                 dst->some_csum_missing = 1;
1077         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1078                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1079                 if (ret < 0)
1080                         return ret;
1081         }
1082
1083         BUG_ON(src->found_link < dir_count);
1084         dst->found_link += src->found_link - dir_count;
1085         dst->found_size += src->found_size;
1086         if (src->extent_start != (u64)-1) {
1087                 if (dst->extent_start == (u64)-1) {
1088                         dst->extent_start = src->extent_start;
1089                         dst->extent_end = src->extent_end;
1090                 } else {
1091                         if (dst->extent_end > src->extent_start)
1092                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1093                         else if (dst->extent_end < src->extent_start) {
1094                                 ret = add_file_extent_hole(&dst->holes,
1095                                         dst->extent_end,
1096                                         src->extent_start - dst->extent_end);
1097                         }
1098                         if (dst->extent_end < src->extent_end)
1099                                 dst->extent_end = src->extent_end;
1100                 }
1101         }
1102
1103         dst->errors |= src->errors;
1104         if (src->found_inode_item) {
1105                 if (!dst->found_inode_item) {
1106                         dst->nlink = src->nlink;
1107                         dst->isize = src->isize;
1108                         dst->nbytes = src->nbytes;
1109                         dst->imode = src->imode;
1110                         dst->nodatasum = src->nodatasum;
1111                         dst->found_inode_item = 1;
1112                 } else {
1113                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1114                 }
1115         }
1116         dst->merging = 0;
1117
1118         return 0;
1119 }
1120
1121 static int splice_shared_node(struct shared_node *src_node,
1122                               struct shared_node *dst_node)
1123 {
1124         struct cache_extent *cache;
1125         struct ptr_node *node, *ins;
1126         struct cache_tree *src, *dst;
1127         struct inode_record *rec, *conflict;
1128         u64 current_ino = 0;
1129         int splice = 0;
1130         int ret;
1131
1132         if (--src_node->refs == 0)
1133                 splice = 1;
1134         if (src_node->current)
1135                 current_ino = src_node->current->ino;
1136
1137         src = &src_node->root_cache;
1138         dst = &dst_node->root_cache;
1139 again:
1140         cache = search_cache_extent(src, 0);
1141         while (cache) {
1142                 node = container_of(cache, struct ptr_node, cache);
1143                 rec = node->data;
1144                 cache = next_cache_extent(cache);
1145
1146                 if (splice) {
1147                         remove_cache_extent(src, &node->cache);
1148                         ins = node;
1149                 } else {
1150                         ins = malloc(sizeof(*ins));
1151                         BUG_ON(!ins);
1152                         ins->cache.start = node->cache.start;
1153                         ins->cache.size = node->cache.size;
1154                         ins->data = rec;
1155                         rec->refs++;
1156                 }
1157                 ret = insert_cache_extent(dst, &ins->cache);
1158                 if (ret == -EEXIST) {
1159                         conflict = get_inode_rec(dst, rec->ino, 1);
1160                         BUG_ON(IS_ERR(conflict));
1161                         merge_inode_recs(rec, conflict, dst);
1162                         if (rec->checked) {
1163                                 conflict->checked = 1;
1164                                 if (dst_node->current == conflict)
1165                                         dst_node->current = NULL;
1166                         }
1167                         maybe_free_inode_rec(dst, conflict);
1168                         free_inode_rec(rec);
1169                         free(ins);
1170                 } else {
1171                         BUG_ON(ret);
1172                 }
1173         }
1174
1175         if (src == &src_node->root_cache) {
1176                 src = &src_node->inode_cache;
1177                 dst = &dst_node->inode_cache;
1178                 goto again;
1179         }
1180
1181         if (current_ino > 0 && (!dst_node->current ||
1182             current_ino > dst_node->current->ino)) {
1183                 if (dst_node->current) {
1184                         dst_node->current->checked = 1;
1185                         maybe_free_inode_rec(dst, dst_node->current);
1186                 }
1187                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1188                 BUG_ON(IS_ERR(dst_node->current));
1189         }
1190         return 0;
1191 }
1192
1193 static void free_inode_ptr(struct cache_extent *cache)
1194 {
1195         struct ptr_node *node;
1196         struct inode_record *rec;
1197
1198         node = container_of(cache, struct ptr_node, cache);
1199         rec = node->data;
1200         free_inode_rec(rec);
1201         free(node);
1202 }
1203
1204 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1205
1206 static struct shared_node *find_shared_node(struct cache_tree *shared,
1207                                             u64 bytenr)
1208 {
1209         struct cache_extent *cache;
1210         struct shared_node *node;
1211
1212         cache = lookup_cache_extent(shared, bytenr, 1);
1213         if (cache) {
1214                 node = container_of(cache, struct shared_node, cache);
1215                 return node;
1216         }
1217         return NULL;
1218 }
1219
1220 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1221 {
1222         int ret;
1223         struct shared_node *node;
1224
1225         node = calloc(1, sizeof(*node));
1226         if (!node)
1227                 return -ENOMEM;
1228         node->cache.start = bytenr;
1229         node->cache.size = 1;
1230         cache_tree_init(&node->root_cache);
1231         cache_tree_init(&node->inode_cache);
1232         node->refs = refs;
1233
1234         ret = insert_cache_extent(shared, &node->cache);
1235
1236         return ret;
1237 }
1238
1239 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1240                              struct walk_control *wc, int level)
1241 {
1242         struct shared_node *node;
1243         struct shared_node *dest;
1244         int ret;
1245
1246         if (level == wc->active_node)
1247                 return 0;
1248
1249         BUG_ON(wc->active_node <= level);
1250         node = find_shared_node(&wc->shared, bytenr);
1251         if (!node) {
1252                 ret = add_shared_node(&wc->shared, bytenr, refs);
1253                 BUG_ON(ret);
1254                 node = find_shared_node(&wc->shared, bytenr);
1255                 wc->nodes[level] = node;
1256                 wc->active_node = level;
1257                 return 0;
1258         }
1259
1260         if (wc->root_level == wc->active_node &&
1261             btrfs_root_refs(&root->root_item) == 0) {
1262                 if (--node->refs == 0) {
1263                         free_inode_recs_tree(&node->root_cache);
1264                         free_inode_recs_tree(&node->inode_cache);
1265                         remove_cache_extent(&wc->shared, &node->cache);
1266                         free(node);
1267                 }
1268                 return 1;
1269         }
1270
1271         dest = wc->nodes[wc->active_node];
1272         splice_shared_node(node, dest);
1273         if (node->refs == 0) {
1274                 remove_cache_extent(&wc->shared, &node->cache);
1275                 free(node);
1276         }
1277         return 1;
1278 }
1279
1280 static int leave_shared_node(struct btrfs_root *root,
1281                              struct walk_control *wc, int level)
1282 {
1283         struct shared_node *node;
1284         struct shared_node *dest;
1285         int i;
1286
1287         if (level == wc->root_level)
1288                 return 0;
1289
1290         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1291                 if (wc->nodes[i])
1292                         break;
1293         }
1294         BUG_ON(i >= BTRFS_MAX_LEVEL);
1295
1296         node = wc->nodes[wc->active_node];
1297         wc->nodes[wc->active_node] = NULL;
1298         wc->active_node = i;
1299
1300         dest = wc->nodes[wc->active_node];
1301         if (wc->active_node < wc->root_level ||
1302             btrfs_root_refs(&root->root_item) > 0) {
1303                 BUG_ON(node->refs <= 1);
1304                 splice_shared_node(node, dest);
1305         } else {
1306                 BUG_ON(node->refs < 2);
1307                 node->refs--;
1308         }
1309         return 0;
1310 }
1311
1312 /*
1313  * Returns:
1314  * < 0 - on error
1315  * 1   - if the root with id child_root_id is a child of root parent_root_id
1316  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1317  *       has other root(s) as parent(s)
1318  * 2   - if the root child_root_id doesn't have any parent roots
1319  */
1320 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1321                          u64 child_root_id)
1322 {
1323         struct btrfs_path path;
1324         struct btrfs_key key;
1325         struct extent_buffer *leaf;
1326         int has_parent = 0;
1327         int ret;
1328
1329         btrfs_init_path(&path);
1330
1331         key.objectid = parent_root_id;
1332         key.type = BTRFS_ROOT_REF_KEY;
1333         key.offset = child_root_id;
1334         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1335                                 0, 0);
1336         if (ret < 0)
1337                 return ret;
1338         btrfs_release_path(&path);
1339         if (!ret)
1340                 return 1;
1341
1342         key.objectid = child_root_id;
1343         key.type = BTRFS_ROOT_BACKREF_KEY;
1344         key.offset = 0;
1345         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1346                                 0, 0);
1347         if (ret < 0)
1348                 goto out;
1349
1350         while (1) {
1351                 leaf = path.nodes[0];
1352                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1353                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1354                         if (ret)
1355                                 break;
1356                         leaf = path.nodes[0];
1357                 }
1358
1359                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1360                 if (key.objectid != child_root_id ||
1361                     key.type != BTRFS_ROOT_BACKREF_KEY)
1362                         break;
1363
1364                 has_parent = 1;
1365
1366                 if (key.offset == parent_root_id) {
1367                         btrfs_release_path(&path);
1368                         return 1;
1369                 }
1370
1371                 path.slots[0]++;
1372         }
1373 out:
1374         btrfs_release_path(&path);
1375         if (ret < 0)
1376                 return ret;
1377         return has_parent ? 0 : 2;
1378 }
1379
1380 static int process_dir_item(struct btrfs_root *root,
1381                             struct extent_buffer *eb,
1382                             int slot, struct btrfs_key *key,
1383                             struct shared_node *active_node)
1384 {
1385         u32 total;
1386         u32 cur = 0;
1387         u32 len;
1388         u32 name_len;
1389         u32 data_len;
1390         int error;
1391         int nritems = 0;
1392         int filetype;
1393         struct btrfs_dir_item *di;
1394         struct inode_record *rec;
1395         struct cache_tree *root_cache;
1396         struct cache_tree *inode_cache;
1397         struct btrfs_key location;
1398         char namebuf[BTRFS_NAME_LEN];
1399
1400         root_cache = &active_node->root_cache;
1401         inode_cache = &active_node->inode_cache;
1402         rec = active_node->current;
1403         rec->found_dir_item = 1;
1404
1405         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1406         total = btrfs_item_size_nr(eb, slot);
1407         while (cur < total) {
1408                 nritems++;
1409                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1410                 name_len = btrfs_dir_name_len(eb, di);
1411                 data_len = btrfs_dir_data_len(eb, di);
1412                 filetype = btrfs_dir_type(eb, di);
1413
1414                 rec->found_size += name_len;
1415                 if (name_len <= BTRFS_NAME_LEN) {
1416                         len = name_len;
1417                         error = 0;
1418                 } else {
1419                         len = BTRFS_NAME_LEN;
1420                         error = REF_ERR_NAME_TOO_LONG;
1421                 }
1422                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1423
1424                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1425                         add_inode_backref(inode_cache, location.objectid,
1426                                           key->objectid, key->offset, namebuf,
1427                                           len, filetype, key->type, error);
1428                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1429                         add_inode_backref(root_cache, location.objectid,
1430                                           key->objectid, key->offset,
1431                                           namebuf, len, filetype,
1432                                           key->type, error);
1433                 } else {
1434                         fprintf(stderr, "invalid location in dir item %u\n",
1435                                 location.type);
1436                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1437                                           key->objectid, key->offset, namebuf,
1438                                           len, filetype, key->type, error);
1439                 }
1440
1441                 len = sizeof(*di) + name_len + data_len;
1442                 di = (struct btrfs_dir_item *)((char *)di + len);
1443                 cur += len;
1444         }
1445         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1446                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1447
1448         return 0;
1449 }
1450
1451 static int process_inode_ref(struct extent_buffer *eb,
1452                              int slot, struct btrfs_key *key,
1453                              struct shared_node *active_node)
1454 {
1455         u32 total;
1456         u32 cur = 0;
1457         u32 len;
1458         u32 name_len;
1459         u64 index;
1460         int error;
1461         struct cache_tree *inode_cache;
1462         struct btrfs_inode_ref *ref;
1463         char namebuf[BTRFS_NAME_LEN];
1464
1465         inode_cache = &active_node->inode_cache;
1466
1467         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1468         total = btrfs_item_size_nr(eb, slot);
1469         while (cur < total) {
1470                 name_len = btrfs_inode_ref_name_len(eb, ref);
1471                 index = btrfs_inode_ref_index(eb, ref);
1472                 if (name_len <= BTRFS_NAME_LEN) {
1473                         len = name_len;
1474                         error = 0;
1475                 } else {
1476                         len = BTRFS_NAME_LEN;
1477                         error = REF_ERR_NAME_TOO_LONG;
1478                 }
1479                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1480                 add_inode_backref(inode_cache, key->objectid, key->offset,
1481                                   index, namebuf, len, 0, key->type, error);
1482
1483                 len = sizeof(*ref) + name_len;
1484                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1485                 cur += len;
1486         }
1487         return 0;
1488 }
1489
1490 static int process_inode_extref(struct extent_buffer *eb,
1491                                 int slot, struct btrfs_key *key,
1492                                 struct shared_node *active_node)
1493 {
1494         u32 total;
1495         u32 cur = 0;
1496         u32 len;
1497         u32 name_len;
1498         u64 index;
1499         u64 parent;
1500         int error;
1501         struct cache_tree *inode_cache;
1502         struct btrfs_inode_extref *extref;
1503         char namebuf[BTRFS_NAME_LEN];
1504
1505         inode_cache = &active_node->inode_cache;
1506
1507         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1508         total = btrfs_item_size_nr(eb, slot);
1509         while (cur < total) {
1510                 name_len = btrfs_inode_extref_name_len(eb, extref);
1511                 index = btrfs_inode_extref_index(eb, extref);
1512                 parent = btrfs_inode_extref_parent(eb, extref);
1513                 if (name_len <= BTRFS_NAME_LEN) {
1514                         len = name_len;
1515                         error = 0;
1516                 } else {
1517                         len = BTRFS_NAME_LEN;
1518                         error = REF_ERR_NAME_TOO_LONG;
1519                 }
1520                 read_extent_buffer(eb, namebuf,
1521                                    (unsigned long)(extref + 1), len);
1522                 add_inode_backref(inode_cache, key->objectid, parent,
1523                                   index, namebuf, len, 0, key->type, error);
1524
1525                 len = sizeof(*extref) + name_len;
1526                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1527                 cur += len;
1528         }
1529         return 0;
1530
1531 }
1532
1533 static int count_csum_range(struct btrfs_root *root, u64 start,
1534                             u64 len, u64 *found)
1535 {
1536         struct btrfs_key key;
1537         struct btrfs_path path;
1538         struct extent_buffer *leaf;
1539         int ret;
1540         size_t size;
1541         *found = 0;
1542         u64 csum_end;
1543         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1544
1545         btrfs_init_path(&path);
1546
1547         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1548         key.offset = start;
1549         key.type = BTRFS_EXTENT_CSUM_KEY;
1550
1551         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1552                                 &key, &path, 0, 0);
1553         if (ret < 0)
1554                 goto out;
1555         if (ret > 0 && path.slots[0] > 0) {
1556                 leaf = path.nodes[0];
1557                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1558                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1559                     key.type == BTRFS_EXTENT_CSUM_KEY)
1560                         path.slots[0]--;
1561         }
1562
1563         while (len > 0) {
1564                 leaf = path.nodes[0];
1565                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1566                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1567                         if (ret > 0)
1568                                 break;
1569                         else if (ret < 0)
1570                                 goto out;
1571                         leaf = path.nodes[0];
1572                 }
1573
1574                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1575                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1576                     key.type != BTRFS_EXTENT_CSUM_KEY)
1577                         break;
1578
1579                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1580                 if (key.offset >= start + len)
1581                         break;
1582
1583                 if (key.offset > start)
1584                         start = key.offset;
1585
1586                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1587                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1588                 if (csum_end > start) {
1589                         size = min(csum_end - start, len);
1590                         len -= size;
1591                         start += size;
1592                         *found += size;
1593                 }
1594
1595                 path.slots[0]++;
1596         }
1597 out:
1598         btrfs_release_path(&path);
1599         if (ret < 0)
1600                 return ret;
1601         return 0;
1602 }
1603
1604 static int process_file_extent(struct btrfs_root *root,
1605                                 struct extent_buffer *eb,
1606                                 int slot, struct btrfs_key *key,
1607                                 struct shared_node *active_node)
1608 {
1609         struct inode_record *rec;
1610         struct btrfs_file_extent_item *fi;
1611         u64 num_bytes = 0;
1612         u64 disk_bytenr = 0;
1613         u64 extent_offset = 0;
1614         u64 mask = root->sectorsize - 1;
1615         int extent_type;
1616         int ret;
1617
1618         rec = active_node->current;
1619         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1620         rec->found_file_extent = 1;
1621
1622         if (rec->extent_start == (u64)-1) {
1623                 rec->extent_start = key->offset;
1624                 rec->extent_end = key->offset;
1625         }
1626
1627         if (rec->extent_end > key->offset)
1628                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1629         else if (rec->extent_end < key->offset) {
1630                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1631                                            key->offset - rec->extent_end);
1632                 if (ret < 0)
1633                         return ret;
1634         }
1635
1636         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1637         extent_type = btrfs_file_extent_type(eb, fi);
1638
1639         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1640                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1641                 if (num_bytes == 0)
1642                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1643                 rec->found_size += num_bytes;
1644                 num_bytes = (num_bytes + mask) & ~mask;
1645         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1646                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1647                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1648                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1649                 extent_offset = btrfs_file_extent_offset(eb, fi);
1650                 if (num_bytes == 0 || (num_bytes & mask))
1651                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1652                 if (num_bytes + extent_offset >
1653                     btrfs_file_extent_ram_bytes(eb, fi))
1654                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1655                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1656                     (btrfs_file_extent_compression(eb, fi) ||
1657                      btrfs_file_extent_encryption(eb, fi) ||
1658                      btrfs_file_extent_other_encoding(eb, fi)))
1659                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1660                 if (disk_bytenr > 0)
1661                         rec->found_size += num_bytes;
1662         } else {
1663                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1664         }
1665         rec->extent_end = key->offset + num_bytes;
1666
1667         /*
1668          * The data reloc tree will copy full extents into its inode and then
1669          * copy the corresponding csums.  Because the extent it copied could be
1670          * a preallocated extent that hasn't been written to yet there may be no
1671          * csums to copy, ergo we won't have csums for our file extent.  This is
1672          * ok so just don't bother checking csums if the inode belongs to the
1673          * data reloc tree.
1674          */
1675         if (disk_bytenr > 0 &&
1676             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1677                 u64 found;
1678                 if (btrfs_file_extent_compression(eb, fi))
1679                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1680                 else
1681                         disk_bytenr += extent_offset;
1682
1683                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1684                 if (ret < 0)
1685                         return ret;
1686                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1687                         if (found > 0)
1688                                 rec->found_csum_item = 1;
1689                         if (found < num_bytes)
1690                                 rec->some_csum_missing = 1;
1691                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1692                         if (found > 0)
1693                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1694                 }
1695         }
1696         return 0;
1697 }
1698
1699 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1700                             struct walk_control *wc)
1701 {
1702         struct btrfs_key key;
1703         u32 nritems;
1704         int i;
1705         int ret = 0;
1706         struct cache_tree *inode_cache;
1707         struct shared_node *active_node;
1708
1709         if (wc->root_level == wc->active_node &&
1710             btrfs_root_refs(&root->root_item) == 0)
1711                 return 0;
1712
1713         active_node = wc->nodes[wc->active_node];
1714         inode_cache = &active_node->inode_cache;
1715         nritems = btrfs_header_nritems(eb);
1716         for (i = 0; i < nritems; i++) {
1717                 btrfs_item_key_to_cpu(eb, &key, i);
1718
1719                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1720                         continue;
1721                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1722                         continue;
1723
1724                 if (active_node->current == NULL ||
1725                     active_node->current->ino < key.objectid) {
1726                         if (active_node->current) {
1727                                 active_node->current->checked = 1;
1728                                 maybe_free_inode_rec(inode_cache,
1729                                                      active_node->current);
1730                         }
1731                         active_node->current = get_inode_rec(inode_cache,
1732                                                              key.objectid, 1);
1733                         BUG_ON(IS_ERR(active_node->current));
1734                 }
1735                 switch (key.type) {
1736                 case BTRFS_DIR_ITEM_KEY:
1737                 case BTRFS_DIR_INDEX_KEY:
1738                         ret = process_dir_item(root, eb, i, &key, active_node);
1739                         break;
1740                 case BTRFS_INODE_REF_KEY:
1741                         ret = process_inode_ref(eb, i, &key, active_node);
1742                         break;
1743                 case BTRFS_INODE_EXTREF_KEY:
1744                         ret = process_inode_extref(eb, i, &key, active_node);
1745                         break;
1746                 case BTRFS_INODE_ITEM_KEY:
1747                         ret = process_inode_item(eb, i, &key, active_node);
1748                         break;
1749                 case BTRFS_EXTENT_DATA_KEY:
1750                         ret = process_file_extent(root, eb, i, &key,
1751                                                   active_node);
1752                         break;
1753                 default:
1754                         break;
1755                 };
1756         }
1757         return ret;
1758 }
1759
1760 static void reada_walk_down(struct btrfs_root *root,
1761                             struct extent_buffer *node, int slot)
1762 {
1763         u64 bytenr;
1764         u64 ptr_gen;
1765         u32 nritems;
1766         u32 blocksize;
1767         int i;
1768         int level;
1769
1770         level = btrfs_header_level(node);
1771         if (level != 1)
1772                 return;
1773
1774         nritems = btrfs_header_nritems(node);
1775         blocksize = root->nodesize;
1776         for (i = slot; i < nritems; i++) {
1777                 bytenr = btrfs_node_blockptr(node, i);
1778                 ptr_gen = btrfs_node_ptr_generation(node, i);
1779                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1780         }
1781 }
1782
1783 /*
1784  * Check the child node/leaf by the following condition:
1785  * 1. the first item key of the node/leaf should be the same with the one
1786  *    in parent.
1787  * 2. block in parent node should match the child node/leaf.
1788  * 3. generation of parent node and child's header should be consistent.
1789  *
1790  * Or the child node/leaf pointed by the key in parent is not valid.
1791  *
1792  * We hope to check leaf owner too, but since subvol may share leaves,
1793  * which makes leaf owner check not so strong, key check should be
1794  * sufficient enough for that case.
1795  */
1796 static int check_child_node(struct btrfs_root *root,
1797                             struct extent_buffer *parent, int slot,
1798                             struct extent_buffer *child)
1799 {
1800         struct btrfs_key parent_key;
1801         struct btrfs_key child_key;
1802         int ret = 0;
1803
1804         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1805         if (btrfs_header_level(child) == 0)
1806                 btrfs_item_key_to_cpu(child, &child_key, 0);
1807         else
1808                 btrfs_node_key_to_cpu(child, &child_key, 0);
1809
1810         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1811                 ret = -EINVAL;
1812                 fprintf(stderr,
1813                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1814                         parent_key.objectid, parent_key.type, parent_key.offset,
1815                         child_key.objectid, child_key.type, child_key.offset);
1816         }
1817         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1818                 ret = -EINVAL;
1819                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1820                         btrfs_node_blockptr(parent, slot),
1821                         btrfs_header_bytenr(child));
1822         }
1823         if (btrfs_node_ptr_generation(parent, slot) !=
1824             btrfs_header_generation(child)) {
1825                 ret = -EINVAL;
1826                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1827                         btrfs_header_generation(child),
1828                         btrfs_node_ptr_generation(parent, slot));
1829         }
1830         return ret;
1831 }
1832
1833 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1834                           struct walk_control *wc, int *level)
1835 {
1836         enum btrfs_tree_block_status status;
1837         u64 bytenr;
1838         u64 ptr_gen;
1839         struct extent_buffer *next;
1840         struct extent_buffer *cur;
1841         u32 blocksize;
1842         int ret, err = 0;
1843         u64 refs;
1844
1845         WARN_ON(*level < 0);
1846         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1847         ret = btrfs_lookup_extent_info(NULL, root,
1848                                        path->nodes[*level]->start,
1849                                        *level, 1, &refs, NULL);
1850         if (ret < 0) {
1851                 err = ret;
1852                 goto out;
1853         }
1854
1855         if (refs > 1) {
1856                 ret = enter_shared_node(root, path->nodes[*level]->start,
1857                                         refs, wc, *level);
1858                 if (ret > 0) {
1859                         err = ret;
1860                         goto out;
1861                 }
1862         }
1863
1864         while (*level >= 0) {
1865                 WARN_ON(*level < 0);
1866                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1867                 cur = path->nodes[*level];
1868
1869                 if (btrfs_header_level(cur) != *level)
1870                         WARN_ON(1);
1871
1872                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1873                         break;
1874                 if (*level == 0) {
1875                         ret = process_one_leaf(root, cur, wc);
1876                         if (ret < 0)
1877                                 err = ret;
1878                         break;
1879                 }
1880                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1881                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1882                 blocksize = root->nodesize;
1883                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1884                                                1, &refs, NULL);
1885                 if (ret < 0)
1886                         refs = 0;
1887
1888                 if (refs > 1) {
1889                         ret = enter_shared_node(root, bytenr, refs,
1890                                                 wc, *level - 1);
1891                         if (ret > 0) {
1892                                 path->slots[*level]++;
1893                                 continue;
1894                         }
1895                 }
1896
1897                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1898                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1899                         free_extent_buffer(next);
1900                         reada_walk_down(root, cur, path->slots[*level]);
1901                         next = read_tree_block(root, bytenr, blocksize,
1902                                                ptr_gen);
1903                         if (!extent_buffer_uptodate(next)) {
1904                                 struct btrfs_key node_key;
1905
1906                                 btrfs_node_key_to_cpu(path->nodes[*level],
1907                                                       &node_key,
1908                                                       path->slots[*level]);
1909                                 btrfs_add_corrupt_extent_record(root->fs_info,
1910                                                 &node_key,
1911                                                 path->nodes[*level]->start,
1912                                                 root->nodesize, *level);
1913                                 err = -EIO;
1914                                 goto out;
1915                         }
1916                 }
1917
1918                 ret = check_child_node(root, cur, path->slots[*level], next);
1919                 if (ret) {
1920                         err = ret;
1921                         goto out;
1922                 }
1923
1924                 if (btrfs_is_leaf(next))
1925                         status = btrfs_check_leaf(root, NULL, next);
1926                 else
1927                         status = btrfs_check_node(root, NULL, next);
1928                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1929                         free_extent_buffer(next);
1930                         err = -EIO;
1931                         goto out;
1932                 }
1933
1934                 *level = *level - 1;
1935                 free_extent_buffer(path->nodes[*level]);
1936                 path->nodes[*level] = next;
1937                 path->slots[*level] = 0;
1938         }
1939 out:
1940         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1941         return err;
1942 }
1943
1944 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1945                         struct walk_control *wc, int *level)
1946 {
1947         int i;
1948         struct extent_buffer *leaf;
1949
1950         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1951                 leaf = path->nodes[i];
1952                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1953                         path->slots[i]++;
1954                         *level = i;
1955                         return 0;
1956                 } else {
1957                         free_extent_buffer(path->nodes[*level]);
1958                         path->nodes[*level] = NULL;
1959                         BUG_ON(*level > wc->active_node);
1960                         if (*level == wc->active_node)
1961                                 leave_shared_node(root, wc, *level);
1962                         *level = i + 1;
1963                 }
1964         }
1965         return 1;
1966 }
1967
1968 static int check_root_dir(struct inode_record *rec)
1969 {
1970         struct inode_backref *backref;
1971         int ret = -1;
1972
1973         if (!rec->found_inode_item || rec->errors)
1974                 goto out;
1975         if (rec->nlink != 1 || rec->found_link != 0)
1976                 goto out;
1977         if (list_empty(&rec->backrefs))
1978                 goto out;
1979         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1980         if (!backref->found_inode_ref)
1981                 goto out;
1982         if (backref->index != 0 || backref->namelen != 2 ||
1983             memcmp(backref->name, "..", 2))
1984                 goto out;
1985         if (backref->found_dir_index || backref->found_dir_item)
1986                 goto out;
1987         ret = 0;
1988 out:
1989         return ret;
1990 }
1991
1992 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1993                               struct btrfs_root *root, struct btrfs_path *path,
1994                               struct inode_record *rec)
1995 {
1996         struct btrfs_inode_item *ei;
1997         struct btrfs_key key;
1998         int ret;
1999
2000         key.objectid = rec->ino;
2001         key.type = BTRFS_INODE_ITEM_KEY;
2002         key.offset = (u64)-1;
2003
2004         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2005         if (ret < 0)
2006                 goto out;
2007         if (ret) {
2008                 if (!path->slots[0]) {
2009                         ret = -ENOENT;
2010                         goto out;
2011                 }
2012                 path->slots[0]--;
2013                 ret = 0;
2014         }
2015         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2016         if (key.objectid != rec->ino) {
2017                 ret = -ENOENT;
2018                 goto out;
2019         }
2020
2021         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2022                             struct btrfs_inode_item);
2023         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2024         btrfs_mark_buffer_dirty(path->nodes[0]);
2025         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2026         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2027                root->root_key.objectid);
2028 out:
2029         btrfs_release_path(path);
2030         return ret;
2031 }
2032
2033 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2034                                     struct btrfs_root *root,
2035                                     struct btrfs_path *path,
2036                                     struct inode_record *rec)
2037 {
2038         int ret;
2039
2040         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2041         btrfs_release_path(path);
2042         if (!ret)
2043                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2044         return ret;
2045 }
2046
2047 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2048                                struct btrfs_root *root,
2049                                struct btrfs_path *path,
2050                                struct inode_record *rec)
2051 {
2052         struct btrfs_inode_item *ei;
2053         struct btrfs_key key;
2054         int ret = 0;
2055
2056         key.objectid = rec->ino;
2057         key.type = BTRFS_INODE_ITEM_KEY;
2058         key.offset = 0;
2059
2060         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2061         if (ret) {
2062                 if (ret > 0)
2063                         ret = -ENOENT;
2064                 goto out;
2065         }
2066
2067         /* Since ret == 0, no need to check anything */
2068         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2069                             struct btrfs_inode_item);
2070         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2071         btrfs_mark_buffer_dirty(path->nodes[0]);
2072         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2073         printf("reset nbytes for ino %llu root %llu\n",
2074                rec->ino, root->root_key.objectid);
2075 out:
2076         btrfs_release_path(path);
2077         return ret;
2078 }
2079
2080 static int add_missing_dir_index(struct btrfs_root *root,
2081                                  struct cache_tree *inode_cache,
2082                                  struct inode_record *rec,
2083                                  struct inode_backref *backref)
2084 {
2085         struct btrfs_path *path;
2086         struct btrfs_trans_handle *trans;
2087         struct btrfs_dir_item *dir_item;
2088         struct extent_buffer *leaf;
2089         struct btrfs_key key;
2090         struct btrfs_disk_key disk_key;
2091         struct inode_record *dir_rec;
2092         unsigned long name_ptr;
2093         u32 data_size = sizeof(*dir_item) + backref->namelen;
2094         int ret;
2095
2096         path = btrfs_alloc_path();
2097         if (!path)
2098                 return -ENOMEM;
2099
2100         trans = btrfs_start_transaction(root, 1);
2101         if (IS_ERR(trans)) {
2102                 btrfs_free_path(path);
2103                 return PTR_ERR(trans);
2104         }
2105
2106         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2107                 (unsigned long long)rec->ino);
2108         key.objectid = backref->dir;
2109         key.type = BTRFS_DIR_INDEX_KEY;
2110         key.offset = backref->index;
2111
2112         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2113         BUG_ON(ret);
2114
2115         leaf = path->nodes[0];
2116         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2117
2118         disk_key.objectid = cpu_to_le64(rec->ino);
2119         disk_key.type = BTRFS_INODE_ITEM_KEY;
2120         disk_key.offset = 0;
2121
2122         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2123         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2124         btrfs_set_dir_data_len(leaf, dir_item, 0);
2125         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2126         name_ptr = (unsigned long)(dir_item + 1);
2127         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2128         btrfs_mark_buffer_dirty(leaf);
2129         btrfs_free_path(path);
2130         btrfs_commit_transaction(trans, root);
2131
2132         backref->found_dir_index = 1;
2133         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2134         BUG_ON(IS_ERR(dir_rec));
2135         if (!dir_rec)
2136                 return 0;
2137         dir_rec->found_size += backref->namelen;
2138         if (dir_rec->found_size == dir_rec->isize &&
2139             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2140                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2141         if (dir_rec->found_size != dir_rec->isize)
2142                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2143
2144         return 0;
2145 }
2146
2147 static int delete_dir_index(struct btrfs_root *root,
2148                             struct cache_tree *inode_cache,
2149                             struct inode_record *rec,
2150                             struct inode_backref *backref)
2151 {
2152         struct btrfs_trans_handle *trans;
2153         struct btrfs_dir_item *di;
2154         struct btrfs_path *path;
2155         int ret = 0;
2156
2157         path = btrfs_alloc_path();
2158         if (!path)
2159                 return -ENOMEM;
2160
2161         trans = btrfs_start_transaction(root, 1);
2162         if (IS_ERR(trans)) {
2163                 btrfs_free_path(path);
2164                 return PTR_ERR(trans);
2165         }
2166
2167
2168         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2169                 (unsigned long long)backref->dir,
2170                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2171                 (unsigned long long)root->objectid);
2172
2173         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2174                                     backref->name, backref->namelen,
2175                                     backref->index, -1);
2176         if (IS_ERR(di)) {
2177                 ret = PTR_ERR(di);
2178                 btrfs_free_path(path);
2179                 btrfs_commit_transaction(trans, root);
2180                 if (ret == -ENOENT)
2181                         return 0;
2182                 return ret;
2183         }
2184
2185         if (!di)
2186                 ret = btrfs_del_item(trans, root, path);
2187         else
2188                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2189         BUG_ON(ret);
2190         btrfs_free_path(path);
2191         btrfs_commit_transaction(trans, root);
2192         return ret;
2193 }
2194
2195 static int create_inode_item(struct btrfs_root *root,
2196                              struct inode_record *rec,
2197                              struct inode_backref *backref, int root_dir)
2198 {
2199         struct btrfs_trans_handle *trans;
2200         struct btrfs_inode_item inode_item;
2201         time_t now = time(NULL);
2202         int ret;
2203
2204         trans = btrfs_start_transaction(root, 1);
2205         if (IS_ERR(trans)) {
2206                 ret = PTR_ERR(trans);
2207                 return ret;
2208         }
2209
2210         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2211                 "be incomplete, please check permissions and content after "
2212                 "the fsck completes.\n", (unsigned long long)root->objectid,
2213                 (unsigned long long)rec->ino);
2214
2215         memset(&inode_item, 0, sizeof(inode_item));
2216         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2217         if (root_dir)
2218                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2219         else
2220                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2221         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2222         if (rec->found_dir_item) {
2223                 if (rec->found_file_extent)
2224                         fprintf(stderr, "root %llu inode %llu has both a dir "
2225                                 "item and extents, unsure if it is a dir or a "
2226                                 "regular file so setting it as a directory\n",
2227                                 (unsigned long long)root->objectid,
2228                                 (unsigned long long)rec->ino);
2229                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2230                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2231         } else if (!rec->found_dir_item) {
2232                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2233                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2234         }
2235         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2236         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2237         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2238         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2239         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2240         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2241         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2242         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2243
2244         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2245         BUG_ON(ret);
2246         btrfs_commit_transaction(trans, root);
2247         return 0;
2248 }
2249
2250 static int repair_inode_backrefs(struct btrfs_root *root,
2251                                  struct inode_record *rec,
2252                                  struct cache_tree *inode_cache,
2253                                  int delete)
2254 {
2255         struct inode_backref *tmp, *backref;
2256         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2257         int ret = 0;
2258         int repaired = 0;
2259
2260         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2261                 if (!delete && rec->ino == root_dirid) {
2262                         if (!rec->found_inode_item) {
2263                                 ret = create_inode_item(root, rec, backref, 1);
2264                                 if (ret)
2265                                         break;
2266                                 repaired++;
2267                         }
2268                 }
2269
2270                 /* Index 0 for root dir's are special, don't mess with it */
2271                 if (rec->ino == root_dirid && backref->index == 0)
2272                         continue;
2273
2274                 if (delete &&
2275                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2276                      (backref->found_dir_index && backref->found_inode_ref &&
2277                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2278                         ret = delete_dir_index(root, inode_cache, rec, backref);
2279                         if (ret)
2280                                 break;
2281                         repaired++;
2282                         list_del(&backref->list);
2283                         free(backref);
2284                 }
2285
2286                 if (!delete && !backref->found_dir_index &&
2287                     backref->found_dir_item && backref->found_inode_ref) {
2288                         ret = add_missing_dir_index(root, inode_cache, rec,
2289                                                     backref);
2290                         if (ret)
2291                                 break;
2292                         repaired++;
2293                         if (backref->found_dir_item &&
2294                             backref->found_dir_index &&
2295                             backref->found_dir_index) {
2296                                 if (!backref->errors &&
2297                                     backref->found_inode_ref) {
2298                                         list_del(&backref->list);
2299                                         free(backref);
2300                                 }
2301                         }
2302                 }
2303
2304                 if (!delete && (!backref->found_dir_index &&
2305                                 !backref->found_dir_item &&
2306                                 backref->found_inode_ref)) {
2307                         struct btrfs_trans_handle *trans;
2308                         struct btrfs_key location;
2309
2310                         ret = check_dir_conflict(root, backref->name,
2311                                                  backref->namelen,
2312                                                  backref->dir,
2313                                                  backref->index);
2314                         if (ret) {
2315                                 /*
2316                                  * let nlink fixing routine to handle it,
2317                                  * which can do it better.
2318                                  */
2319                                 ret = 0;
2320                                 break;
2321                         }
2322                         location.objectid = rec->ino;
2323                         location.type = BTRFS_INODE_ITEM_KEY;
2324                         location.offset = 0;
2325
2326                         trans = btrfs_start_transaction(root, 1);
2327                         if (IS_ERR(trans)) {
2328                                 ret = PTR_ERR(trans);
2329                                 break;
2330                         }
2331                         fprintf(stderr, "adding missing dir index/item pair "
2332                                 "for inode %llu\n",
2333                                 (unsigned long long)rec->ino);
2334                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2335                                                     backref->namelen,
2336                                                     backref->dir, &location,
2337                                                     imode_to_type(rec->imode),
2338                                                     backref->index);
2339                         BUG_ON(ret);
2340                         btrfs_commit_transaction(trans, root);
2341                         repaired++;
2342                 }
2343
2344                 if (!delete && (backref->found_inode_ref &&
2345                                 backref->found_dir_index &&
2346                                 backref->found_dir_item &&
2347                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2348                                 !rec->found_inode_item)) {
2349                         ret = create_inode_item(root, rec, backref, 0);
2350                         if (ret)
2351                                 break;
2352                         repaired++;
2353                 }
2354
2355         }
2356         return ret ? ret : repaired;
2357 }
2358
2359 /*
2360  * To determine the file type for nlink/inode_item repair
2361  *
2362  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2363  * Return -ENOENT if file type is not found.
2364  */
2365 static int find_file_type(struct inode_record *rec, u8 *type)
2366 {
2367         struct inode_backref *backref;
2368
2369         /* For inode item recovered case */
2370         if (rec->found_inode_item) {
2371                 *type = imode_to_type(rec->imode);
2372                 return 0;
2373         }
2374
2375         list_for_each_entry(backref, &rec->backrefs, list) {
2376                 if (backref->found_dir_index || backref->found_dir_item) {
2377                         *type = backref->filetype;
2378                         return 0;
2379                 }
2380         }
2381         return -ENOENT;
2382 }
2383
2384 /*
2385  * To determine the file name for nlink repair
2386  *
2387  * Return 0 if file name is found, set name and namelen.
2388  * Return -ENOENT if file name is not found.
2389  */
2390 static int find_file_name(struct inode_record *rec,
2391                           char *name, int *namelen)
2392 {
2393         struct inode_backref *backref;
2394
2395         list_for_each_entry(backref, &rec->backrefs, list) {
2396                 if (backref->found_dir_index || backref->found_dir_item ||
2397                     backref->found_inode_ref) {
2398                         memcpy(name, backref->name, backref->namelen);
2399                         *namelen = backref->namelen;
2400                         return 0;
2401                 }
2402         }
2403         return -ENOENT;
2404 }
2405
2406 /* Reset the nlink of the inode to the correct one */
2407 static int reset_nlink(struct btrfs_trans_handle *trans,
2408                        struct btrfs_root *root,
2409                        struct btrfs_path *path,
2410                        struct inode_record *rec)
2411 {
2412         struct inode_backref *backref;
2413         struct inode_backref *tmp;
2414         struct btrfs_key key;
2415         struct btrfs_inode_item *inode_item;
2416         int ret = 0;
2417
2418         /* We don't believe this either, reset it and iterate backref */
2419         rec->found_link = 0;
2420
2421         /* Remove all backref including the valid ones */
2422         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2423                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2424                                    backref->index, backref->name,
2425                                    backref->namelen, 0);
2426                 if (ret < 0)
2427                         goto out;
2428
2429                 /* remove invalid backref, so it won't be added back */
2430                 if (!(backref->found_dir_index &&
2431                       backref->found_dir_item &&
2432                       backref->found_inode_ref)) {
2433                         list_del(&backref->list);
2434                         free(backref);
2435                 } else {
2436                         rec->found_link++;
2437                 }
2438         }
2439
2440         /* Set nlink to 0 */
2441         key.objectid = rec->ino;
2442         key.type = BTRFS_INODE_ITEM_KEY;
2443         key.offset = 0;
2444         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2445         if (ret < 0)
2446                 goto out;
2447         if (ret > 0) {
2448                 ret = -ENOENT;
2449                 goto out;
2450         }
2451         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2452                                     struct btrfs_inode_item);
2453         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2454         btrfs_mark_buffer_dirty(path->nodes[0]);
2455         btrfs_release_path(path);
2456
2457         /*
2458          * Add back valid inode_ref/dir_item/dir_index,
2459          * add_link() will handle the nlink inc, so new nlink must be correct
2460          */
2461         list_for_each_entry(backref, &rec->backrefs, list) {
2462                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2463                                      backref->name, backref->namelen,
2464                                      backref->filetype, &backref->index, 1);
2465                 if (ret < 0)
2466                         goto out;
2467         }
2468 out:
2469         btrfs_release_path(path);
2470         return ret;
2471 }
2472
2473 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2474                                struct btrfs_root *root,
2475                                struct btrfs_path *path,
2476                                struct inode_record *rec)
2477 {
2478         char *dir_name = "lost+found";
2479         char namebuf[BTRFS_NAME_LEN] = {0};
2480         u64 lost_found_ino;
2481         u32 mode = 0700;
2482         u8 type = 0;
2483         int namelen = 0;
2484         int name_recovered = 0;
2485         int type_recovered = 0;
2486         int ret = 0;
2487
2488         /*
2489          * Get file name and type first before these invalid inode ref
2490          * are deleted by remove_all_invalid_backref()
2491          */
2492         name_recovered = !find_file_name(rec, namebuf, &namelen);
2493         type_recovered = !find_file_type(rec, &type);
2494
2495         if (!name_recovered) {
2496                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2497                        rec->ino, rec->ino);
2498                 namelen = count_digits(rec->ino);
2499                 sprintf(namebuf, "%llu", rec->ino);
2500                 name_recovered = 1;
2501         }
2502         if (!type_recovered) {
2503                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2504                        rec->ino);
2505                 type = BTRFS_FT_REG_FILE;
2506                 type_recovered = 1;
2507         }
2508
2509         ret = reset_nlink(trans, root, path, rec);
2510         if (ret < 0) {
2511                 fprintf(stderr,
2512                         "Failed to reset nlink for inode %llu: %s\n",
2513                         rec->ino, strerror(-ret));
2514                 goto out;
2515         }
2516
2517         if (rec->found_link == 0) {
2518                 lost_found_ino = root->highest_inode;
2519                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2520                         ret = -EOVERFLOW;
2521                         goto out;
2522                 }
2523                 lost_found_ino++;
2524                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2525                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2526                                   mode);
2527                 if (ret < 0) {
2528                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2529                                 dir_name, strerror(-ret));
2530                         goto out;
2531                 }
2532                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2533                                      namebuf, namelen, type, NULL, 1);
2534                 /*
2535                  * Add ".INO" suffix several times to handle case where
2536                  * "FILENAME.INO" is already taken by another file.
2537                  */
2538                 while (ret == -EEXIST) {
2539                         /*
2540                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2541                          */
2542                         if (namelen + count_digits(rec->ino) + 1 >
2543                             BTRFS_NAME_LEN) {
2544                                 ret = -EFBIG;
2545                                 goto out;
2546                         }
2547                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2548                                  ".%llu", rec->ino);
2549                         namelen += count_digits(rec->ino) + 1;
2550                         ret = btrfs_add_link(trans, root, rec->ino,
2551                                              lost_found_ino, namebuf,
2552                                              namelen, type, NULL, 1);
2553                 }
2554                 if (ret < 0) {
2555                         fprintf(stderr,
2556                                 "Failed to link the inode %llu to %s dir: %s\n",
2557                                 rec->ino, dir_name, strerror(-ret));
2558                         goto out;
2559                 }
2560                 /*
2561                  * Just increase the found_link, don't actually add the
2562                  * backref. This will make things easier and this inode
2563                  * record will be freed after the repair is done.
2564                  * So fsck will not report problem about this inode.
2565                  */
2566                 rec->found_link++;
2567                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2568                        namelen, namebuf, dir_name);
2569         }
2570         printf("Fixed the nlink of inode %llu\n", rec->ino);
2571 out:
2572         /*
2573          * Clear the flag anyway, or we will loop forever for the same inode
2574          * as it will not be removed from the bad inode list and the dead loop
2575          * happens.
2576          */
2577         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2578         btrfs_release_path(path);
2579         return ret;
2580 }
2581
2582 /*
2583  * Check if there is any normal(reg or prealloc) file extent for given
2584  * ino.
2585  * This is used to determine the file type when neither its dir_index/item or
2586  * inode_item exists.
2587  *
2588  * This will *NOT* report error, if any error happens, just consider it does
2589  * not have any normal file extent.
2590  */
2591 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2592 {
2593         struct btrfs_path *path;
2594         struct btrfs_key key;
2595         struct btrfs_key found_key;
2596         struct btrfs_file_extent_item *fi;
2597         u8 type;
2598         int ret = 0;
2599
2600         path = btrfs_alloc_path();
2601         if (!path)
2602                 goto out;
2603         key.objectid = ino;
2604         key.type = BTRFS_EXTENT_DATA_KEY;
2605         key.offset = 0;
2606
2607         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2608         if (ret < 0) {
2609                 ret = 0;
2610                 goto out;
2611         }
2612         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2613                 ret = btrfs_next_leaf(root, path);
2614                 if (ret) {
2615                         ret = 0;
2616                         goto out;
2617                 }
2618         }
2619         while (1) {
2620                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2621                                       path->slots[0]);
2622                 if (found_key.objectid != ino ||
2623                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2624                         break;
2625                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2626                                     struct btrfs_file_extent_item);
2627                 type = btrfs_file_extent_type(path->nodes[0], fi);
2628                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2629                         ret = 1;
2630                         goto out;
2631                 }
2632         }
2633 out:
2634         btrfs_free_path(path);
2635         return ret;
2636 }
2637
2638 static u32 btrfs_type_to_imode(u8 type)
2639 {
2640         static u32 imode_by_btrfs_type[] = {
2641                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2642                 [BTRFS_FT_DIR]          = S_IFDIR,
2643                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2644                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2645                 [BTRFS_FT_FIFO]         = S_IFIFO,
2646                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2647                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2648         };
2649
2650         return imode_by_btrfs_type[(type)];
2651 }
2652
2653 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2654                                 struct btrfs_root *root,
2655                                 struct btrfs_path *path,
2656                                 struct inode_record *rec)
2657 {
2658         u8 filetype;
2659         u32 mode = 0700;
2660         int type_recovered = 0;
2661         int ret = 0;
2662
2663         printf("Trying to rebuild inode:%llu\n", rec->ino);
2664
2665         type_recovered = !find_file_type(rec, &filetype);
2666
2667         /*
2668          * Try to determine inode type if type not found.
2669          *
2670          * For found regular file extent, it must be FILE.
2671          * For found dir_item/index, it must be DIR.
2672          *
2673          * For undetermined one, use FILE as fallback.
2674          *
2675          * TODO:
2676          * 1. If found backref(inode_index/item is already handled) to it,
2677          *    it must be DIR.
2678          *    Need new inode-inode ref structure to allow search for that.
2679          */
2680         if (!type_recovered) {
2681                 if (rec->found_file_extent &&
2682                     find_normal_file_extent(root, rec->ino)) {
2683                         type_recovered = 1;
2684                         filetype = BTRFS_FT_REG_FILE;
2685                 } else if (rec->found_dir_item) {
2686                         type_recovered = 1;
2687                         filetype = BTRFS_FT_DIR;
2688                 } else if (!list_empty(&rec->orphan_extents)) {
2689                         type_recovered = 1;
2690                         filetype = BTRFS_FT_REG_FILE;
2691                 } else{
2692                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2693                                rec->ino);
2694                         type_recovered = 1;
2695                         filetype = BTRFS_FT_REG_FILE;
2696                 }
2697         }
2698
2699         ret = btrfs_new_inode(trans, root, rec->ino,
2700                               mode | btrfs_type_to_imode(filetype));
2701         if (ret < 0)
2702                 goto out;
2703
2704         /*
2705          * Here inode rebuild is done, we only rebuild the inode item,
2706          * don't repair the nlink(like move to lost+found).
2707          * That is the job of nlink repair.
2708          *
2709          * We just fill the record and return
2710          */
2711         rec->found_dir_item = 1;
2712         rec->imode = mode | btrfs_type_to_imode(filetype);
2713         rec->nlink = 0;
2714         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2715         /* Ensure the inode_nlinks repair function will be called */
2716         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2717 out:
2718         return ret;
2719 }
2720
2721 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2722                                       struct btrfs_root *root,
2723                                       struct btrfs_path *path,
2724                                       struct inode_record *rec)
2725 {
2726         struct orphan_data_extent *orphan;
2727         struct orphan_data_extent *tmp;
2728         int ret = 0;
2729
2730         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2731                 /*
2732                  * Check for conflicting file extents
2733                  *
2734                  * Here we don't know whether the extents is compressed or not,
2735                  * so we can only assume it not compressed nor data offset,
2736                  * and use its disk_len as extent length.
2737                  */
2738                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2739                                        orphan->offset, orphan->disk_len, 0);
2740                 btrfs_release_path(path);
2741                 if (ret < 0)
2742                         goto out;
2743                 if (!ret) {
2744                         fprintf(stderr,
2745                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2746                                 orphan->disk_bytenr, orphan->disk_len);
2747                         ret = btrfs_free_extent(trans,
2748                                         root->fs_info->extent_root,
2749                                         orphan->disk_bytenr, orphan->disk_len,
2750                                         0, root->objectid, orphan->objectid,
2751                                         orphan->offset);
2752                         if (ret < 0)
2753                                 goto out;
2754                 }
2755                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2756                                 orphan->offset, orphan->disk_bytenr,
2757                                 orphan->disk_len, orphan->disk_len);
2758                 if (ret < 0)
2759                         goto out;
2760
2761                 /* Update file size info */
2762                 rec->found_size += orphan->disk_len;
2763                 if (rec->found_size == rec->nbytes)
2764                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2765
2766                 /* Update the file extent hole info too */
2767                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2768                                            orphan->disk_len);
2769                 if (ret < 0)
2770                         goto out;
2771                 if (RB_EMPTY_ROOT(&rec->holes))
2772                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2773
2774                 list_del(&orphan->list);
2775                 free(orphan);
2776         }
2777         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2778 out:
2779         return ret;
2780 }
2781
2782 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2783                                         struct btrfs_root *root,
2784                                         struct btrfs_path *path,
2785                                         struct inode_record *rec)
2786 {
2787         struct rb_node *node;
2788         struct file_extent_hole *hole;
2789         int found = 0;
2790         int ret = 0;
2791
2792         node = rb_first(&rec->holes);
2793
2794         while (node) {
2795                 found = 1;
2796                 hole = rb_entry(node, struct file_extent_hole, node);
2797                 ret = btrfs_punch_hole(trans, root, rec->ino,
2798                                        hole->start, hole->len);
2799                 if (ret < 0)
2800                         goto out;
2801                 ret = del_file_extent_hole(&rec->holes, hole->start,
2802                                            hole->len);
2803                 if (ret < 0)
2804                         goto out;
2805                 if (RB_EMPTY_ROOT(&rec->holes))
2806                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2807                 node = rb_first(&rec->holes);
2808         }
2809         /* special case for a file losing all its file extent */
2810         if (!found) {
2811                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2812                                        round_up(rec->isize, root->sectorsize));
2813                 if (ret < 0)
2814                         goto out;
2815         }
2816         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2817                rec->ino, root->objectid);
2818 out:
2819         return ret;
2820 }
2821
2822 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2823 {
2824         struct btrfs_trans_handle *trans;
2825         struct btrfs_path *path;
2826         int ret = 0;
2827
2828         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2829                              I_ERR_NO_ORPHAN_ITEM |
2830                              I_ERR_LINK_COUNT_WRONG |
2831                              I_ERR_NO_INODE_ITEM |
2832                              I_ERR_FILE_EXTENT_ORPHAN |
2833                              I_ERR_FILE_EXTENT_DISCOUNT|
2834                              I_ERR_FILE_NBYTES_WRONG)))
2835                 return rec->errors;
2836
2837         path = btrfs_alloc_path();
2838         if (!path)
2839                 return -ENOMEM;
2840
2841         /*
2842          * For nlink repair, it may create a dir and add link, so
2843          * 2 for parent(256)'s dir_index and dir_item
2844          * 2 for lost+found dir's inode_item and inode_ref
2845          * 1 for the new inode_ref of the file
2846          * 2 for lost+found dir's dir_index and dir_item for the file
2847          */
2848         trans = btrfs_start_transaction(root, 7);
2849         if (IS_ERR(trans)) {
2850                 btrfs_free_path(path);
2851                 return PTR_ERR(trans);
2852         }
2853
2854         if (rec->errors & I_ERR_NO_INODE_ITEM)
2855                 ret = repair_inode_no_item(trans, root, path, rec);
2856         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2857                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2858         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2859                 ret = repair_inode_discount_extent(trans, root, path, rec);
2860         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2861                 ret = repair_inode_isize(trans, root, path, rec);
2862         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2863                 ret = repair_inode_orphan_item(trans, root, path, rec);
2864         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2865                 ret = repair_inode_nlinks(trans, root, path, rec);
2866         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2867                 ret = repair_inode_nbytes(trans, root, path, rec);
2868         btrfs_commit_transaction(trans, root);
2869         btrfs_free_path(path);
2870         return ret;
2871 }
2872
2873 static int check_inode_recs(struct btrfs_root *root,
2874                             struct cache_tree *inode_cache)
2875 {
2876         struct cache_extent *cache;
2877         struct ptr_node *node;
2878         struct inode_record *rec;
2879         struct inode_backref *backref;
2880         int stage = 0;
2881         int ret = 0;
2882         int err = 0;
2883         u64 error = 0;
2884         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2885
2886         if (btrfs_root_refs(&root->root_item) == 0) {
2887                 if (!cache_tree_empty(inode_cache))
2888                         fprintf(stderr, "warning line %d\n", __LINE__);
2889                 return 0;
2890         }
2891
2892         /*
2893          * We need to record the highest inode number for later 'lost+found'
2894          * dir creation.
2895          * We must select a ino not used/refered by any existing inode, or
2896          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2897          * this may cause 'lost+found' dir has wrong nlinks.
2898          */
2899         cache = last_cache_extent(inode_cache);
2900         if (cache) {
2901                 node = container_of(cache, struct ptr_node, cache);
2902                 rec = node->data;
2903                 if (rec->ino > root->highest_inode)
2904                         root->highest_inode = rec->ino;
2905         }
2906
2907         /*
2908          * We need to repair backrefs first because we could change some of the
2909          * errors in the inode recs.
2910          *
2911          * We also need to go through and delete invalid backrefs first and then
2912          * add the correct ones second.  We do this because we may get EEXIST
2913          * when adding back the correct index because we hadn't yet deleted the
2914          * invalid index.
2915          *
2916          * For example, if we were missing a dir index then the directories
2917          * isize would be wrong, so if we fixed the isize to what we thought it
2918          * would be and then fixed the backref we'd still have a invalid fs, so
2919          * we need to add back the dir index and then check to see if the isize
2920          * is still wrong.
2921          */
2922         while (stage < 3) {
2923                 stage++;
2924                 if (stage == 3 && !err)
2925                         break;
2926
2927                 cache = search_cache_extent(inode_cache, 0);
2928                 while (repair && cache) {
2929                         node = container_of(cache, struct ptr_node, cache);
2930                         rec = node->data;
2931                         cache = next_cache_extent(cache);
2932
2933                         /* Need to free everything up and rescan */
2934                         if (stage == 3) {
2935                                 remove_cache_extent(inode_cache, &node->cache);
2936                                 free(node);
2937                                 free_inode_rec(rec);
2938                                 continue;
2939                         }
2940
2941                         if (list_empty(&rec->backrefs))
2942                                 continue;
2943
2944                         ret = repair_inode_backrefs(root, rec, inode_cache,
2945                                                     stage == 1);
2946                         if (ret < 0) {
2947                                 err = ret;
2948                                 stage = 2;
2949                                 break;
2950                         } if (ret > 0) {
2951                                 err = -EAGAIN;
2952                         }
2953                 }
2954         }
2955         if (err)
2956                 return err;
2957
2958         rec = get_inode_rec(inode_cache, root_dirid, 0);
2959         BUG_ON(IS_ERR(rec));
2960         if (rec) {
2961                 ret = check_root_dir(rec);
2962                 if (ret) {
2963                         fprintf(stderr, "root %llu root dir %llu error\n",
2964                                 (unsigned long long)root->root_key.objectid,
2965                                 (unsigned long long)root_dirid);
2966                         print_inode_error(root, rec);
2967                         error++;
2968                 }
2969         } else {
2970                 if (repair) {
2971                         struct btrfs_trans_handle *trans;
2972
2973                         trans = btrfs_start_transaction(root, 1);
2974                         if (IS_ERR(trans)) {
2975                                 err = PTR_ERR(trans);
2976                                 return err;
2977                         }
2978
2979                         fprintf(stderr,
2980                                 "root %llu missing its root dir, recreating\n",
2981                                 (unsigned long long)root->objectid);
2982
2983                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2984                         BUG_ON(ret);
2985
2986                         btrfs_commit_transaction(trans, root);
2987                         return -EAGAIN;
2988                 }
2989
2990                 fprintf(stderr, "root %llu root dir %llu not found\n",
2991                         (unsigned long long)root->root_key.objectid,
2992                         (unsigned long long)root_dirid);
2993         }
2994
2995         while (1) {
2996                 cache = search_cache_extent(inode_cache, 0);
2997                 if (!cache)
2998                         break;
2999                 node = container_of(cache, struct ptr_node, cache);
3000                 rec = node->data;
3001                 remove_cache_extent(inode_cache, &node->cache);
3002                 free(node);
3003                 if (rec->ino == root_dirid ||
3004                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3005                         free_inode_rec(rec);
3006                         continue;
3007                 }
3008
3009                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3010                         ret = check_orphan_item(root, rec->ino);
3011                         if (ret == 0)
3012                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3013                         if (can_free_inode_rec(rec)) {
3014                                 free_inode_rec(rec);
3015                                 continue;
3016                         }
3017                 }
3018
3019                 if (!rec->found_inode_item)
3020                         rec->errors |= I_ERR_NO_INODE_ITEM;
3021                 if (rec->found_link != rec->nlink)
3022                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3023                 if (repair) {
3024                         ret = try_repair_inode(root, rec);
3025                         if (ret == 0 && can_free_inode_rec(rec)) {
3026                                 free_inode_rec(rec);
3027                                 continue;
3028                         }
3029                         ret = 0;
3030                 }
3031
3032                 if (!(repair && ret == 0))
3033                         error++;
3034                 print_inode_error(root, rec);
3035                 list_for_each_entry(backref, &rec->backrefs, list) {
3036                         if (!backref->found_dir_item)
3037                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3038                         if (!backref->found_dir_index)
3039                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3040                         if (!backref->found_inode_ref)
3041                                 backref->errors |= REF_ERR_NO_INODE_REF;
3042                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3043                                 " namelen %u name %s filetype %d errors %x",
3044                                 (unsigned long long)backref->dir,
3045                                 (unsigned long long)backref->index,
3046                                 backref->namelen, backref->name,
3047                                 backref->filetype, backref->errors);
3048                         print_ref_error(backref->errors);
3049                 }
3050                 free_inode_rec(rec);
3051         }
3052         return (error > 0) ? -1 : 0;
3053 }
3054
3055 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3056                                         u64 objectid)
3057 {
3058         struct cache_extent *cache;
3059         struct root_record *rec = NULL;
3060         int ret;
3061
3062         cache = lookup_cache_extent(root_cache, objectid, 1);
3063         if (cache) {
3064                 rec = container_of(cache, struct root_record, cache);
3065         } else {
3066                 rec = calloc(1, sizeof(*rec));
3067                 if (!rec)
3068                         return ERR_PTR(-ENOMEM);
3069                 rec->objectid = objectid;
3070                 INIT_LIST_HEAD(&rec->backrefs);
3071                 rec->cache.start = objectid;
3072                 rec->cache.size = 1;
3073
3074                 ret = insert_cache_extent(root_cache, &rec->cache);
3075                 if (ret)
3076                         return ERR_PTR(-EEXIST);
3077         }
3078         return rec;
3079 }
3080
3081 static struct root_backref *get_root_backref(struct root_record *rec,
3082                                              u64 ref_root, u64 dir, u64 index,
3083                                              const char *name, int namelen)
3084 {
3085         struct root_backref *backref;
3086
3087         list_for_each_entry(backref, &rec->backrefs, list) {
3088                 if (backref->ref_root != ref_root || backref->dir != dir ||
3089                     backref->namelen != namelen)
3090                         continue;
3091                 if (memcmp(name, backref->name, namelen))
3092                         continue;
3093                 return backref;
3094         }
3095
3096         backref = calloc(1, sizeof(*backref) + namelen + 1);
3097         if (!backref)
3098                 return NULL;
3099         backref->ref_root = ref_root;
3100         backref->dir = dir;
3101         backref->index = index;
3102         backref->namelen = namelen;
3103         memcpy(backref->name, name, namelen);
3104         backref->name[namelen] = '\0';
3105         list_add_tail(&backref->list, &rec->backrefs);
3106         return backref;
3107 }
3108
3109 static void free_root_record(struct cache_extent *cache)
3110 {
3111         struct root_record *rec;
3112         struct root_backref *backref;
3113
3114         rec = container_of(cache, struct root_record, cache);
3115         while (!list_empty(&rec->backrefs)) {
3116                 backref = list_entry(rec->backrefs.next,
3117                                      struct root_backref, list);
3118                 list_del(&backref->list);
3119                 free(backref);
3120         }
3121
3122         kfree(rec);
3123 }
3124
3125 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3126
3127 static int add_root_backref(struct cache_tree *root_cache,
3128                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3129                             const char *name, int namelen,
3130                             int item_type, int errors)
3131 {
3132         struct root_record *rec;
3133         struct root_backref *backref;
3134
3135         rec = get_root_rec(root_cache, root_id);
3136         BUG_ON(IS_ERR(rec));
3137         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3138         BUG_ON(!backref);
3139
3140         backref->errors |= errors;
3141
3142         if (item_type != BTRFS_DIR_ITEM_KEY) {
3143                 if (backref->found_dir_index || backref->found_back_ref ||
3144                     backref->found_forward_ref) {
3145                         if (backref->index != index)
3146                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3147                 } else {
3148                         backref->index = index;
3149                 }
3150         }
3151
3152         if (item_type == BTRFS_DIR_ITEM_KEY) {
3153                 if (backref->found_forward_ref)
3154                         rec->found_ref++;
3155                 backref->found_dir_item = 1;
3156         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3157                 backref->found_dir_index = 1;
3158         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3159                 if (backref->found_forward_ref)
3160                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3161                 else if (backref->found_dir_item)
3162                         rec->found_ref++;
3163                 backref->found_forward_ref = 1;
3164         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3165                 if (backref->found_back_ref)
3166                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3167                 backref->found_back_ref = 1;
3168         } else {
3169                 BUG_ON(1);
3170         }
3171
3172         if (backref->found_forward_ref && backref->found_dir_item)
3173                 backref->reachable = 1;
3174         return 0;
3175 }
3176
3177 static int merge_root_recs(struct btrfs_root *root,
3178                            struct cache_tree *src_cache,
3179                            struct cache_tree *dst_cache)
3180 {
3181         struct cache_extent *cache;
3182         struct ptr_node *node;
3183         struct inode_record *rec;
3184         struct inode_backref *backref;
3185         int ret = 0;
3186
3187         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3188                 free_inode_recs_tree(src_cache);
3189                 return 0;
3190         }
3191
3192         while (1) {
3193                 cache = search_cache_extent(src_cache, 0);
3194                 if (!cache)
3195                         break;
3196                 node = container_of(cache, struct ptr_node, cache);
3197                 rec = node->data;
3198                 remove_cache_extent(src_cache, &node->cache);
3199                 free(node);
3200
3201                 ret = is_child_root(root, root->objectid, rec->ino);
3202                 if (ret < 0)
3203                         break;
3204                 else if (ret == 0)
3205                         goto skip;
3206
3207                 list_for_each_entry(backref, &rec->backrefs, list) {
3208                         BUG_ON(backref->found_inode_ref);
3209                         if (backref->found_dir_item)
3210                                 add_root_backref(dst_cache, rec->ino,
3211                                         root->root_key.objectid, backref->dir,
3212                                         backref->index, backref->name,
3213                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3214                                         backref->errors);
3215                         if (backref->found_dir_index)
3216                                 add_root_backref(dst_cache, rec->ino,
3217                                         root->root_key.objectid, backref->dir,
3218                                         backref->index, backref->name,
3219                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3220                                         backref->errors);
3221                 }
3222 skip:
3223                 free_inode_rec(rec);
3224         }
3225         if (ret < 0)
3226                 return ret;
3227         return 0;
3228 }
3229
3230 static int check_root_refs(struct btrfs_root *root,
3231                            struct cache_tree *root_cache)
3232 {
3233         struct root_record *rec;
3234         struct root_record *ref_root;
3235         struct root_backref *backref;
3236         struct cache_extent *cache;
3237         int loop = 1;
3238         int ret;
3239         int error;
3240         int errors = 0;
3241
3242         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3243         BUG_ON(IS_ERR(rec));
3244         rec->found_ref = 1;
3245
3246         /* fixme: this can not detect circular references */
3247         while (loop) {
3248                 loop = 0;
3249                 cache = search_cache_extent(root_cache, 0);
3250                 while (1) {
3251                         if (!cache)
3252                                 break;
3253                         rec = container_of(cache, struct root_record, cache);
3254                         cache = next_cache_extent(cache);
3255
3256                         if (rec->found_ref == 0)
3257                                 continue;
3258
3259                         list_for_each_entry(backref, &rec->backrefs, list) {
3260                                 if (!backref->reachable)
3261                                         continue;
3262
3263                                 ref_root = get_root_rec(root_cache,
3264                                                         backref->ref_root);
3265                                 BUG_ON(IS_ERR(ref_root));
3266                                 if (ref_root->found_ref > 0)
3267                                         continue;
3268
3269                                 backref->reachable = 0;
3270                                 rec->found_ref--;
3271                                 if (rec->found_ref == 0)
3272                                         loop = 1;
3273                         }
3274                 }
3275         }
3276
3277         cache = search_cache_extent(root_cache, 0);
3278         while (1) {
3279                 if (!cache)
3280                         break;
3281                 rec = container_of(cache, struct root_record, cache);
3282                 cache = next_cache_extent(cache);
3283
3284                 if (rec->found_ref == 0 &&
3285                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3286                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3287                         ret = check_orphan_item(root->fs_info->tree_root,
3288                                                 rec->objectid);
3289                         if (ret == 0)
3290                                 continue;
3291
3292                         /*
3293                          * If we don't have a root item then we likely just have
3294                          * a dir item in a snapshot for this root but no actual
3295                          * ref key or anything so it's meaningless.
3296                          */
3297                         if (!rec->found_root_item)
3298                                 continue;
3299                         errors++;
3300                         fprintf(stderr, "fs tree %llu not referenced\n",
3301                                 (unsigned long long)rec->objectid);
3302                 }
3303
3304                 error = 0;
3305                 if (rec->found_ref > 0 && !rec->found_root_item)
3306                         error = 1;
3307                 list_for_each_entry(backref, &rec->backrefs, list) {
3308                         if (!backref->found_dir_item)
3309                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3310                         if (!backref->found_dir_index)
3311                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3312                         if (!backref->found_back_ref)
3313                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3314                         if (!backref->found_forward_ref)
3315                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3316                         if (backref->reachable && backref->errors)
3317                                 error = 1;
3318                 }
3319                 if (!error)
3320                         continue;
3321
3322                 errors++;
3323                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3324                         (unsigned long long)rec->objectid, rec->found_ref,
3325                          rec->found_root_item ? "" : "not found");
3326
3327                 list_for_each_entry(backref, &rec->backrefs, list) {
3328                         if (!backref->reachable)
3329                                 continue;
3330                         if (!backref->errors && rec->found_root_item)
3331                                 continue;
3332                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3333                                 " index %llu namelen %u name %s errors %x\n",
3334                                 (unsigned long long)backref->ref_root,
3335                                 (unsigned long long)backref->dir,
3336                                 (unsigned long long)backref->index,
3337                                 backref->namelen, backref->name,
3338                                 backref->errors);
3339                         print_ref_error(backref->errors);
3340                 }
3341         }
3342         return errors > 0 ? 1 : 0;
3343 }
3344
3345 static int process_root_ref(struct extent_buffer *eb, int slot,
3346                             struct btrfs_key *key,
3347                             struct cache_tree *root_cache)
3348 {
3349         u64 dirid;
3350         u64 index;
3351         u32 len;
3352         u32 name_len;
3353         struct btrfs_root_ref *ref;
3354         char namebuf[BTRFS_NAME_LEN];
3355         int error;
3356
3357         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3358
3359         dirid = btrfs_root_ref_dirid(eb, ref);
3360         index = btrfs_root_ref_sequence(eb, ref);
3361         name_len = btrfs_root_ref_name_len(eb, ref);
3362
3363         if (name_len <= BTRFS_NAME_LEN) {
3364                 len = name_len;
3365                 error = 0;
3366         } else {
3367                 len = BTRFS_NAME_LEN;
3368                 error = REF_ERR_NAME_TOO_LONG;
3369         }
3370         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3371
3372         if (key->type == BTRFS_ROOT_REF_KEY) {
3373                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3374                                  index, namebuf, len, key->type, error);
3375         } else {
3376                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3377                                  index, namebuf, len, key->type, error);
3378         }
3379         return 0;
3380 }
3381
3382 static void free_corrupt_block(struct cache_extent *cache)
3383 {
3384         struct btrfs_corrupt_block *corrupt;
3385
3386         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3387         free(corrupt);
3388 }
3389
3390 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3391
3392 /*
3393  * Repair the btree of the given root.
3394  *
3395  * The fix is to remove the node key in corrupt_blocks cache_tree.
3396  * and rebalance the tree.
3397  * After the fix, the btree should be writeable.
3398  */
3399 static int repair_btree(struct btrfs_root *root,
3400                         struct cache_tree *corrupt_blocks)
3401 {
3402         struct btrfs_trans_handle *trans;
3403         struct btrfs_path *path;
3404         struct btrfs_corrupt_block *corrupt;
3405         struct cache_extent *cache;
3406         struct btrfs_key key;
3407         u64 offset;
3408         int level;
3409         int ret = 0;
3410
3411         if (cache_tree_empty(corrupt_blocks))
3412                 return 0;
3413
3414         path = btrfs_alloc_path();
3415         if (!path)
3416                 return -ENOMEM;
3417
3418         trans = btrfs_start_transaction(root, 1);
3419         if (IS_ERR(trans)) {
3420                 ret = PTR_ERR(trans);
3421                 fprintf(stderr, "Error starting transaction: %s\n",
3422                         strerror(-ret));
3423                 goto out_free_path;
3424         }
3425         cache = first_cache_extent(corrupt_blocks);
3426         while (cache) {
3427                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3428                                        cache);
3429                 level = corrupt->level;
3430                 path->lowest_level = level;
3431                 key.objectid = corrupt->key.objectid;
3432                 key.type = corrupt->key.type;
3433                 key.offset = corrupt->key.offset;
3434
3435                 /*
3436                  * Here we don't want to do any tree balance, since it may
3437                  * cause a balance with corrupted brother leaf/node,
3438                  * so ins_len set to 0 here.
3439                  * Balance will be done after all corrupt node/leaf is deleted.
3440                  */
3441                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3442                 if (ret < 0)
3443                         goto out;
3444                 offset = btrfs_node_blockptr(path->nodes[level],
3445                                              path->slots[level]);
3446
3447                 /* Remove the ptr */
3448                 ret = btrfs_del_ptr(trans, root, path, level,
3449                                     path->slots[level]);
3450                 if (ret < 0)
3451                         goto out;
3452                 /*
3453                  * Remove the corresponding extent
3454                  * return value is not concerned.
3455                  */
3456                 btrfs_release_path(path);
3457                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3458                                         0, root->root_key.objectid,
3459                                         level - 1, 0);
3460                 cache = next_cache_extent(cache);
3461         }
3462
3463         /* Balance the btree using btrfs_search_slot() */
3464         cache = first_cache_extent(corrupt_blocks);
3465         while (cache) {
3466                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3467                                        cache);
3468                 memcpy(&key, &corrupt->key, sizeof(key));
3469                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3470                 if (ret < 0)
3471                         goto out;
3472                 /* return will always >0 since it won't find the item */
3473                 ret = 0;
3474                 btrfs_release_path(path);
3475                 cache = next_cache_extent(cache);
3476         }
3477 out:
3478         btrfs_commit_transaction(trans, root);
3479 out_free_path:
3480         btrfs_free_path(path);
3481         return ret;
3482 }
3483
3484 static int check_fs_root(struct btrfs_root *root,
3485                          struct cache_tree *root_cache,
3486                          struct walk_control *wc)
3487 {
3488         int ret = 0;
3489         int err = 0;
3490         int wret;
3491         int level;
3492         struct btrfs_path path;
3493         struct shared_node root_node;
3494         struct root_record *rec;
3495         struct btrfs_root_item *root_item = &root->root_item;
3496         struct cache_tree corrupt_blocks;
3497         struct orphan_data_extent *orphan;
3498         struct orphan_data_extent *tmp;
3499         enum btrfs_tree_block_status status;
3500
3501         /*
3502          * Reuse the corrupt_block cache tree to record corrupted tree block
3503          *
3504          * Unlike the usage in extent tree check, here we do it in a per
3505          * fs/subvol tree base.
3506          */
3507         cache_tree_init(&corrupt_blocks);
3508         root->fs_info->corrupt_blocks = &corrupt_blocks;
3509
3510         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3511                 rec = get_root_rec(root_cache, root->root_key.objectid);
3512                 BUG_ON(IS_ERR(rec));
3513                 if (btrfs_root_refs(root_item) > 0)
3514                         rec->found_root_item = 1;
3515         }
3516
3517         btrfs_init_path(&path);
3518         memset(&root_node, 0, sizeof(root_node));
3519         cache_tree_init(&root_node.root_cache);
3520         cache_tree_init(&root_node.inode_cache);
3521
3522         /* Move the orphan extent record to corresponding inode_record */
3523         list_for_each_entry_safe(orphan, tmp,
3524                                  &root->orphan_data_extents, list) {
3525                 struct inode_record *inode;
3526
3527                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3528                                       1);
3529                 BUG_ON(IS_ERR(inode));
3530                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3531                 list_move(&orphan->list, &inode->orphan_extents);
3532         }
3533
3534         level = btrfs_header_level(root->node);
3535         memset(wc->nodes, 0, sizeof(wc->nodes));
3536         wc->nodes[level] = &root_node;
3537         wc->active_node = level;
3538         wc->root_level = level;
3539
3540         /* We may not have checked the root block, lets do that now */
3541         if (btrfs_is_leaf(root->node))
3542                 status = btrfs_check_leaf(root, NULL, root->node);
3543         else
3544                 status = btrfs_check_node(root, NULL, root->node);
3545         if (status != BTRFS_TREE_BLOCK_CLEAN)
3546                 return -EIO;
3547
3548         if (btrfs_root_refs(root_item) > 0 ||
3549             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3550                 path.nodes[level] = root->node;
3551                 extent_buffer_get(root->node);
3552                 path.slots[level] = 0;
3553         } else {
3554                 struct btrfs_key key;
3555                 struct btrfs_disk_key found_key;
3556
3557                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3558                 level = root_item->drop_level;
3559                 path.lowest_level = level;
3560                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3561                 if (wret < 0)
3562                         goto skip_walking;
3563                 btrfs_node_key(path.nodes[level], &found_key,
3564                                 path.slots[level]);
3565                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3566                                         sizeof(found_key)));
3567         }
3568
3569         while (1) {
3570                 wret = walk_down_tree(root, &path, wc, &level);
3571                 if (wret < 0)
3572                         ret = wret;
3573                 if (wret != 0)
3574                         break;
3575
3576                 wret = walk_up_tree(root, &path, wc, &level);
3577                 if (wret < 0)
3578                         ret = wret;
3579                 if (wret != 0)
3580                         break;
3581         }
3582 skip_walking:
3583         btrfs_release_path(&path);
3584
3585         if (!cache_tree_empty(&corrupt_blocks)) {
3586                 struct cache_extent *cache;
3587                 struct btrfs_corrupt_block *corrupt;
3588
3589                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3590                        root->root_key.objectid);
3591                 cache = first_cache_extent(&corrupt_blocks);
3592                 while (cache) {
3593                         corrupt = container_of(cache,
3594                                                struct btrfs_corrupt_block,
3595                                                cache);
3596                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3597                                cache->start, corrupt->level,
3598                                corrupt->key.objectid, corrupt->key.type,
3599                                corrupt->key.offset);
3600                         cache = next_cache_extent(cache);
3601                 }
3602                 if (repair) {
3603                         printf("Try to repair the btree for root %llu\n",
3604                                root->root_key.objectid);
3605                         ret = repair_btree(root, &corrupt_blocks);
3606                         if (ret < 0)
3607                                 fprintf(stderr, "Failed to repair btree: %s\n",
3608                                         strerror(-ret));
3609                         if (!ret)
3610                                 printf("Btree for root %llu is fixed\n",
3611                                        root->root_key.objectid);
3612                 }
3613         }
3614
3615         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3616         if (err < 0)
3617                 ret = err;
3618
3619         if (root_node.current) {
3620                 root_node.current->checked = 1;
3621                 maybe_free_inode_rec(&root_node.inode_cache,
3622                                 root_node.current);
3623         }
3624
3625         err = check_inode_recs(root, &root_node.inode_cache);
3626         if (!ret)
3627                 ret = err;
3628
3629         free_corrupt_blocks_tree(&corrupt_blocks);
3630         root->fs_info->corrupt_blocks = NULL;
3631         free_orphan_data_extents(&root->orphan_data_extents);
3632         return ret;
3633 }
3634
3635 static int fs_root_objectid(u64 objectid)
3636 {
3637         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3638             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3639                 return 1;
3640         return is_fstree(objectid);
3641 }
3642
3643 static int check_fs_roots(struct btrfs_root *root,
3644                           struct cache_tree *root_cache)
3645 {
3646         struct btrfs_path path;
3647         struct btrfs_key key;
3648         struct walk_control wc;
3649         struct extent_buffer *leaf, *tree_node;
3650         struct btrfs_root *tmp_root;
3651         struct btrfs_root *tree_root = root->fs_info->tree_root;
3652         int ret;
3653         int err = 0;
3654
3655         if (ctx.progress_enabled) {
3656                 ctx.tp = TASK_FS_ROOTS;
3657                 task_start(ctx.info);
3658         }
3659
3660         /*
3661          * Just in case we made any changes to the extent tree that weren't
3662          * reflected into the free space cache yet.
3663          */
3664         if (repair)
3665                 reset_cached_block_groups(root->fs_info);
3666         memset(&wc, 0, sizeof(wc));
3667         cache_tree_init(&wc.shared);
3668         btrfs_init_path(&path);
3669
3670 again:
3671         key.offset = 0;
3672         key.objectid = 0;
3673         key.type = BTRFS_ROOT_ITEM_KEY;
3674         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3675         if (ret < 0) {
3676                 err = 1;
3677                 goto out;
3678         }
3679         tree_node = tree_root->node;
3680         while (1) {
3681                 if (tree_node != tree_root->node) {
3682                         free_root_recs_tree(root_cache);
3683                         btrfs_release_path(&path);
3684                         goto again;
3685                 }
3686                 leaf = path.nodes[0];
3687                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3688                         ret = btrfs_next_leaf(tree_root, &path);
3689                         if (ret) {
3690                                 if (ret < 0)
3691                                         err = 1;
3692                                 break;
3693                         }
3694                         leaf = path.nodes[0];
3695                 }
3696                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3697                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3698                     fs_root_objectid(key.objectid)) {
3699                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3700                                 tmp_root = btrfs_read_fs_root_no_cache(
3701                                                 root->fs_info, &key);
3702                         } else {
3703                                 key.offset = (u64)-1;
3704                                 tmp_root = btrfs_read_fs_root(
3705                                                 root->fs_info, &key);
3706                         }
3707                         if (IS_ERR(tmp_root)) {
3708                                 err = 1;
3709                                 goto next;
3710                         }
3711                         ret = check_fs_root(tmp_root, root_cache, &wc);
3712                         if (ret == -EAGAIN) {
3713                                 free_root_recs_tree(root_cache);
3714                                 btrfs_release_path(&path);
3715                                 goto again;
3716                         }
3717                         if (ret)
3718                                 err = 1;
3719                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3720                                 btrfs_free_fs_root(tmp_root);
3721                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3722                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3723                         process_root_ref(leaf, path.slots[0], &key,
3724                                          root_cache);
3725                 }
3726 next:
3727                 path.slots[0]++;
3728         }
3729 out:
3730         btrfs_release_path(&path);
3731         if (err)
3732                 free_extent_cache_tree(&wc.shared);
3733         if (!cache_tree_empty(&wc.shared))
3734                 fprintf(stderr, "warning line %d\n", __LINE__);
3735
3736         task_stop(ctx.info);
3737
3738         return err;
3739 }
3740
3741 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3742 {
3743         struct list_head *cur = rec->backrefs.next;
3744         struct extent_backref *back;
3745         struct tree_backref *tback;
3746         struct data_backref *dback;
3747         u64 found = 0;
3748         int err = 0;
3749
3750         while(cur != &rec->backrefs) {
3751                 back = list_entry(cur, struct extent_backref, list);
3752                 cur = cur->next;
3753                 if (!back->found_extent_tree) {
3754                         err = 1;
3755                         if (!print_errs)
3756                                 goto out;
3757                         if (back->is_data) {
3758                                 dback = (struct data_backref *)back;
3759                                 fprintf(stderr, "Backref %llu %s %llu"
3760                                         " owner %llu offset %llu num_refs %lu"
3761                                         " not found in extent tree\n",
3762                                         (unsigned long long)rec->start,
3763                                         back->full_backref ?
3764                                         "parent" : "root",
3765                                         back->full_backref ?
3766                                         (unsigned long long)dback->parent:
3767                                         (unsigned long long)dback->root,
3768                                         (unsigned long long)dback->owner,
3769                                         (unsigned long long)dback->offset,
3770                                         (unsigned long)dback->num_refs);
3771                         } else {
3772                                 tback = (struct tree_backref *)back;
3773                                 fprintf(stderr, "Backref %llu parent %llu"
3774                                         " root %llu not found in extent tree\n",
3775                                         (unsigned long long)rec->start,
3776                                         (unsigned long long)tback->parent,
3777                                         (unsigned long long)tback->root);
3778                         }
3779                 }
3780                 if (!back->is_data && !back->found_ref) {
3781                         err = 1;
3782                         if (!print_errs)
3783                                 goto out;
3784                         tback = (struct tree_backref *)back;
3785                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3786                                 (unsigned long long)rec->start,
3787                                 back->full_backref ? "parent" : "root",
3788                                 back->full_backref ?
3789                                 (unsigned long long)tback->parent :
3790                                 (unsigned long long)tback->root, back);
3791                 }
3792                 if (back->is_data) {
3793                         dback = (struct data_backref *)back;
3794                         if (dback->found_ref != dback->num_refs) {
3795                                 err = 1;
3796                                 if (!print_errs)
3797                                         goto out;
3798                                 fprintf(stderr, "Incorrect local backref count"
3799                                         " on %llu %s %llu owner %llu"
3800                                         " offset %llu found %u wanted %u back %p\n",
3801                                         (unsigned long long)rec->start,
3802                                         back->full_backref ?
3803                                         "parent" : "root",
3804                                         back->full_backref ?
3805                                         (unsigned long long)dback->parent:
3806                                         (unsigned long long)dback->root,
3807                                         (unsigned long long)dback->owner,
3808                                         (unsigned long long)dback->offset,
3809                                         dback->found_ref, dback->num_refs, back);
3810                         }
3811                         if (dback->disk_bytenr != rec->start) {
3812                                 err = 1;
3813                                 if (!print_errs)
3814                                         goto out;
3815                                 fprintf(stderr, "Backref disk bytenr does not"
3816                                         " match extent record, bytenr=%llu, "
3817                                         "ref bytenr=%llu\n",
3818                                         (unsigned long long)rec->start,
3819                                         (unsigned long long)dback->disk_bytenr);
3820                         }
3821
3822                         if (dback->bytes != rec->nr) {
3823                                 err = 1;
3824                                 if (!print_errs)
3825                                         goto out;
3826                                 fprintf(stderr, "Backref bytes do not match "
3827                                         "extent backref, bytenr=%llu, ref "
3828                                         "bytes=%llu, backref bytes=%llu\n",
3829                                         (unsigned long long)rec->start,
3830                                         (unsigned long long)rec->nr,
3831                                         (unsigned long long)dback->bytes);
3832                         }
3833                 }
3834                 if (!back->is_data) {
3835                         found += 1;
3836                 } else {
3837                         dback = (struct data_backref *)back;
3838                         found += dback->found_ref;
3839                 }
3840         }
3841         if (found != rec->refs) {
3842                 err = 1;
3843                 if (!print_errs)
3844                         goto out;
3845                 fprintf(stderr, "Incorrect global backref count "
3846                         "on %llu found %llu wanted %llu\n",
3847                         (unsigned long long)rec->start,
3848                         (unsigned long long)found,
3849                         (unsigned long long)rec->refs);
3850         }
3851 out:
3852         return err;
3853 }
3854
3855 static int free_all_extent_backrefs(struct extent_record *rec)
3856 {
3857         struct extent_backref *back;
3858         struct list_head *cur;
3859         while (!list_empty(&rec->backrefs)) {
3860                 cur = rec->backrefs.next;
3861                 back = list_entry(cur, struct extent_backref, list);
3862                 list_del(cur);
3863                 free(back);
3864         }
3865         return 0;
3866 }
3867
3868 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3869                                      struct cache_tree *extent_cache)
3870 {
3871         struct cache_extent *cache;
3872         struct extent_record *rec;
3873
3874         while (1) {
3875                 cache = first_cache_extent(extent_cache);
3876                 if (!cache)
3877                         break;
3878                 rec = container_of(cache, struct extent_record, cache);
3879                 remove_cache_extent(extent_cache, cache);
3880                 free_all_extent_backrefs(rec);
3881                 free(rec);
3882         }
3883 }
3884
3885 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3886                                  struct extent_record *rec)
3887 {
3888         if (rec->content_checked && rec->owner_ref_checked &&
3889             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3890             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3891             !rec->bad_full_backref && !rec->crossing_stripes &&
3892             !rec->wrong_chunk_type) {
3893                 remove_cache_extent(extent_cache, &rec->cache);
3894                 free_all_extent_backrefs(rec);
3895                 list_del_init(&rec->list);
3896                 free(rec);
3897         }
3898         return 0;
3899 }
3900
3901 static int check_owner_ref(struct btrfs_root *root,
3902                             struct extent_record *rec,
3903                             struct extent_buffer *buf)
3904 {
3905         struct extent_backref *node;
3906         struct tree_backref *back;
3907         struct btrfs_root *ref_root;
3908         struct btrfs_key key;
3909         struct btrfs_path path;
3910         struct extent_buffer *parent;
3911         int level;
3912         int found = 0;
3913         int ret;
3914
3915         list_for_each_entry(node, &rec->backrefs, list) {
3916                 if (node->is_data)
3917                         continue;
3918                 if (!node->found_ref)
3919                         continue;
3920                 if (node->full_backref)
3921                         continue;
3922                 back = (struct tree_backref *)node;
3923                 if (btrfs_header_owner(buf) == back->root)
3924                         return 0;
3925         }
3926         BUG_ON(rec->is_root);
3927
3928         /* try to find the block by search corresponding fs tree */
3929         key.objectid = btrfs_header_owner(buf);
3930         key.type = BTRFS_ROOT_ITEM_KEY;
3931         key.offset = (u64)-1;
3932
3933         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3934         if (IS_ERR(ref_root))
3935                 return 1;
3936
3937         level = btrfs_header_level(buf);
3938         if (level == 0)
3939                 btrfs_item_key_to_cpu(buf, &key, 0);
3940         else
3941                 btrfs_node_key_to_cpu(buf, &key, 0);
3942
3943         btrfs_init_path(&path);
3944         path.lowest_level = level + 1;
3945         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3946         if (ret < 0)
3947                 return 0;
3948
3949         parent = path.nodes[level + 1];
3950         if (parent && buf->start == btrfs_node_blockptr(parent,
3951                                                         path.slots[level + 1]))
3952                 found = 1;
3953
3954         btrfs_release_path(&path);
3955         return found ? 0 : 1;
3956 }
3957
3958 static int is_extent_tree_record(struct extent_record *rec)
3959 {
3960         struct list_head *cur = rec->backrefs.next;
3961         struct extent_backref *node;
3962         struct tree_backref *back;
3963         int is_extent = 0;
3964
3965         while(cur != &rec->backrefs) {
3966                 node = list_entry(cur, struct extent_backref, list);
3967                 cur = cur->next;
3968                 if (node->is_data)
3969                         return 0;
3970                 back = (struct tree_backref *)node;
3971                 if (node->full_backref)
3972                         return 0;
3973                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3974                         is_extent = 1;
3975         }
3976         return is_extent;
3977 }
3978
3979
3980 static int record_bad_block_io(struct btrfs_fs_info *info,
3981                                struct cache_tree *extent_cache,
3982                                u64 start, u64 len)
3983 {
3984         struct extent_record *rec;
3985         struct cache_extent *cache;
3986         struct btrfs_key key;
3987
3988         cache = lookup_cache_extent(extent_cache, start, len);
3989         if (!cache)
3990                 return 0;
3991
3992         rec = container_of(cache, struct extent_record, cache);
3993         if (!is_extent_tree_record(rec))
3994                 return 0;
3995
3996         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
3997         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
3998 }
3999
4000 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4001                        struct extent_buffer *buf, int slot)
4002 {
4003         if (btrfs_header_level(buf)) {
4004                 struct btrfs_key_ptr ptr1, ptr2;
4005
4006                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4007                                    sizeof(struct btrfs_key_ptr));
4008                 read_extent_buffer(buf, &ptr2,
4009                                    btrfs_node_key_ptr_offset(slot + 1),
4010                                    sizeof(struct btrfs_key_ptr));
4011                 write_extent_buffer(buf, &ptr1,
4012                                     btrfs_node_key_ptr_offset(slot + 1),
4013                                     sizeof(struct btrfs_key_ptr));
4014                 write_extent_buffer(buf, &ptr2,
4015                                     btrfs_node_key_ptr_offset(slot),
4016                                     sizeof(struct btrfs_key_ptr));
4017                 if (slot == 0) {
4018                         struct btrfs_disk_key key;
4019                         btrfs_node_key(buf, &key, 0);
4020                         btrfs_fixup_low_keys(root, path, &key,
4021                                              btrfs_header_level(buf) + 1);
4022                 }
4023         } else {
4024                 struct btrfs_item *item1, *item2;
4025                 struct btrfs_key k1, k2;
4026                 char *item1_data, *item2_data;
4027                 u32 item1_offset, item2_offset, item1_size, item2_size;
4028
4029                 item1 = btrfs_item_nr(slot);
4030                 item2 = btrfs_item_nr(slot + 1);
4031                 btrfs_item_key_to_cpu(buf, &k1, slot);
4032                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4033                 item1_offset = btrfs_item_offset(buf, item1);
4034                 item2_offset = btrfs_item_offset(buf, item2);
4035                 item1_size = btrfs_item_size(buf, item1);
4036                 item2_size = btrfs_item_size(buf, item2);
4037
4038                 item1_data = malloc(item1_size);
4039                 if (!item1_data)
4040                         return -ENOMEM;
4041                 item2_data = malloc(item2_size);
4042                 if (!item2_data) {
4043                         free(item1_data);
4044                         return -ENOMEM;
4045                 }
4046
4047                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4048                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4049
4050                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4051                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4052                 free(item1_data);
4053                 free(item2_data);
4054
4055                 btrfs_set_item_offset(buf, item1, item2_offset);
4056                 btrfs_set_item_offset(buf, item2, item1_offset);
4057                 btrfs_set_item_size(buf, item1, item2_size);
4058                 btrfs_set_item_size(buf, item2, item1_size);
4059
4060                 path->slots[0] = slot;
4061                 btrfs_set_item_key_unsafe(root, path, &k2);
4062                 path->slots[0] = slot + 1;
4063                 btrfs_set_item_key_unsafe(root, path, &k1);
4064         }
4065         return 0;
4066 }
4067
4068 static int fix_key_order(struct btrfs_trans_handle *trans,
4069                          struct btrfs_root *root,
4070                          struct btrfs_path *path)
4071 {
4072         struct extent_buffer *buf;
4073         struct btrfs_key k1, k2;
4074         int i;
4075         int level = path->lowest_level;
4076         int ret = -EIO;
4077
4078         buf = path->nodes[level];
4079         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4080                 if (level) {
4081                         btrfs_node_key_to_cpu(buf, &k1, i);
4082                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4083                 } else {
4084                         btrfs_item_key_to_cpu(buf, &k1, i);
4085                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4086                 }
4087                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4088                         continue;
4089                 ret = swap_values(root, path, buf, i);
4090                 if (ret)
4091                         break;
4092                 btrfs_mark_buffer_dirty(buf);
4093                 i = 0;
4094         }
4095         return ret;
4096 }
4097
4098 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4099                              struct btrfs_root *root,
4100                              struct btrfs_path *path,
4101                              struct extent_buffer *buf, int slot)
4102 {
4103         struct btrfs_key key;
4104         int nritems = btrfs_header_nritems(buf);
4105
4106         btrfs_item_key_to_cpu(buf, &key, slot);
4107
4108         /* These are all the keys we can deal with missing. */
4109         if (key.type != BTRFS_DIR_INDEX_KEY &&
4110             key.type != BTRFS_EXTENT_ITEM_KEY &&
4111             key.type != BTRFS_METADATA_ITEM_KEY &&
4112             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4113             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4114                 return -1;
4115
4116         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4117                (unsigned long long)key.objectid, key.type,
4118                (unsigned long long)key.offset, slot, buf->start);
4119         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4120                               btrfs_item_nr_offset(slot + 1),
4121                               sizeof(struct btrfs_item) *
4122                               (nritems - slot - 1));
4123         btrfs_set_header_nritems(buf, nritems - 1);
4124         if (slot == 0) {
4125                 struct btrfs_disk_key disk_key;
4126
4127                 btrfs_item_key(buf, &disk_key, 0);
4128                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4129         }
4130         btrfs_mark_buffer_dirty(buf);
4131         return 0;
4132 }
4133
4134 static int fix_item_offset(struct btrfs_trans_handle *trans,
4135                            struct btrfs_root *root,
4136                            struct btrfs_path *path)
4137 {
4138         struct extent_buffer *buf;
4139         int i;
4140         int ret = 0;
4141
4142         /* We should only get this for leaves */
4143         BUG_ON(path->lowest_level);
4144         buf = path->nodes[0];
4145 again:
4146         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4147                 unsigned int shift = 0, offset;
4148
4149                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4150                     BTRFS_LEAF_DATA_SIZE(root)) {
4151                         if (btrfs_item_end_nr(buf, i) >
4152                             BTRFS_LEAF_DATA_SIZE(root)) {
4153                                 ret = delete_bogus_item(trans, root, path,
4154                                                         buf, i);
4155                                 if (!ret)
4156                                         goto again;
4157                                 fprintf(stderr, "item is off the end of the "
4158                                         "leaf, can't fix\n");
4159                                 ret = -EIO;
4160                                 break;
4161                         }
4162                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4163                                 btrfs_item_end_nr(buf, i);
4164                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4165                            btrfs_item_offset_nr(buf, i - 1)) {
4166                         if (btrfs_item_end_nr(buf, i) >
4167                             btrfs_item_offset_nr(buf, i - 1)) {
4168                                 ret = delete_bogus_item(trans, root, path,
4169                                                         buf, i);
4170                                 if (!ret)
4171                                         goto again;
4172                                 fprintf(stderr, "items overlap, can't fix\n");
4173                                 ret = -EIO;
4174                                 break;
4175                         }
4176                         shift = btrfs_item_offset_nr(buf, i - 1) -
4177                                 btrfs_item_end_nr(buf, i);
4178                 }
4179                 if (!shift)
4180                         continue;
4181
4182                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4183                        i, shift, (unsigned long long)buf->start);
4184                 offset = btrfs_item_offset_nr(buf, i);
4185                 memmove_extent_buffer(buf,
4186                                       btrfs_leaf_data(buf) + offset + shift,
4187                                       btrfs_leaf_data(buf) + offset,
4188                                       btrfs_item_size_nr(buf, i));
4189                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4190                                       offset + shift);
4191                 btrfs_mark_buffer_dirty(buf);
4192         }
4193
4194         /*
4195          * We may have moved things, in which case we want to exit so we don't
4196          * write those changes out.  Once we have proper abort functionality in
4197          * progs this can be changed to something nicer.
4198          */
4199         BUG_ON(ret);
4200         return ret;
4201 }
4202
4203 /*
4204  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4205  * then just return -EIO.
4206  */
4207 static int try_to_fix_bad_block(struct btrfs_root *root,
4208                                 struct extent_buffer *buf,
4209                                 enum btrfs_tree_block_status status)
4210 {
4211         struct btrfs_trans_handle *trans;
4212         struct ulist *roots;
4213         struct ulist_node *node;
4214         struct btrfs_root *search_root;
4215         struct btrfs_path *path;
4216         struct ulist_iterator iter;
4217         struct btrfs_key root_key, key;
4218         int ret;
4219
4220         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4221             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4222                 return -EIO;
4223
4224         path = btrfs_alloc_path();
4225         if (!path)
4226                 return -EIO;
4227
4228         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4229                                    0, &roots);
4230         if (ret) {
4231                 btrfs_free_path(path);
4232                 return -EIO;
4233         }
4234
4235         ULIST_ITER_INIT(&iter);
4236         while ((node = ulist_next(roots, &iter))) {
4237                 root_key.objectid = node->val;
4238                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4239                 root_key.offset = (u64)-1;
4240
4241                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4242                 if (IS_ERR(root)) {
4243                         ret = -EIO;
4244                         break;
4245                 }
4246
4247
4248                 trans = btrfs_start_transaction(search_root, 0);
4249                 if (IS_ERR(trans)) {
4250                         ret = PTR_ERR(trans);
4251                         break;
4252                 }
4253
4254                 path->lowest_level = btrfs_header_level(buf);
4255                 path->skip_check_block = 1;
4256                 if (path->lowest_level)
4257                         btrfs_node_key_to_cpu(buf, &key, 0);
4258                 else
4259                         btrfs_item_key_to_cpu(buf, &key, 0);
4260                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4261                 if (ret) {
4262                         ret = -EIO;
4263                         btrfs_commit_transaction(trans, search_root);
4264                         break;
4265                 }
4266                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4267                         ret = fix_key_order(trans, search_root, path);
4268                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4269                         ret = fix_item_offset(trans, search_root, path);
4270                 if (ret) {
4271                         btrfs_commit_transaction(trans, search_root);
4272                         break;
4273                 }
4274                 btrfs_release_path(path);
4275                 btrfs_commit_transaction(trans, search_root);
4276         }
4277         ulist_free(roots);
4278         btrfs_free_path(path);
4279         return ret;
4280 }
4281
4282 static int check_block(struct btrfs_root *root,
4283                        struct cache_tree *extent_cache,
4284                        struct extent_buffer *buf, u64 flags)
4285 {
4286         struct extent_record *rec;
4287         struct cache_extent *cache;
4288         struct btrfs_key key;
4289         enum btrfs_tree_block_status status;
4290         int ret = 0;
4291         int level;
4292
4293         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4294         if (!cache)
4295                 return 1;
4296         rec = container_of(cache, struct extent_record, cache);
4297         rec->generation = btrfs_header_generation(buf);
4298
4299         level = btrfs_header_level(buf);
4300         if (btrfs_header_nritems(buf) > 0) {
4301
4302                 if (level == 0)
4303                         btrfs_item_key_to_cpu(buf, &key, 0);
4304                 else
4305                         btrfs_node_key_to_cpu(buf, &key, 0);
4306
4307                 rec->info_objectid = key.objectid;
4308         }
4309         rec->info_level = level;
4310
4311         if (btrfs_is_leaf(buf))
4312                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4313         else
4314                 status = btrfs_check_node(root, &rec->parent_key, buf);
4315
4316         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4317                 if (repair)
4318                         status = try_to_fix_bad_block(root, buf, status);
4319                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4320                         ret = -EIO;
4321                         fprintf(stderr, "bad block %llu\n",
4322                                 (unsigned long long)buf->start);
4323                 } else {
4324                         /*
4325                          * Signal to callers we need to start the scan over
4326                          * again since we'll have cow'ed blocks.
4327                          */
4328                         ret = -EAGAIN;
4329                 }
4330         } else {
4331                 rec->content_checked = 1;
4332                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4333                         rec->owner_ref_checked = 1;
4334                 else {
4335                         ret = check_owner_ref(root, rec, buf);
4336                         if (!ret)
4337                                 rec->owner_ref_checked = 1;
4338                 }
4339         }
4340         if (!ret)
4341                 maybe_free_extent_rec(extent_cache, rec);
4342         return ret;
4343 }
4344
4345 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4346                                                 u64 parent, u64 root)
4347 {
4348         struct list_head *cur = rec->backrefs.next;
4349         struct extent_backref *node;
4350         struct tree_backref *back;
4351
4352         while(cur != &rec->backrefs) {
4353                 node = list_entry(cur, struct extent_backref, list);
4354                 cur = cur->next;
4355                 if (node->is_data)
4356                         continue;
4357                 back = (struct tree_backref *)node;
4358                 if (parent > 0) {
4359                         if (!node->full_backref)
4360                                 continue;
4361                         if (parent == back->parent)
4362                                 return back;
4363                 } else {
4364                         if (node->full_backref)
4365                                 continue;
4366                         if (back->root == root)
4367                                 return back;
4368                 }
4369         }
4370         return NULL;
4371 }
4372
4373 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4374                                                 u64 parent, u64 root)
4375 {
4376         struct tree_backref *ref = malloc(sizeof(*ref));
4377
4378         if (!ref)
4379                 return NULL;
4380         memset(&ref->node, 0, sizeof(ref->node));
4381         if (parent > 0) {
4382                 ref->parent = parent;
4383                 ref->node.full_backref = 1;
4384         } else {
4385                 ref->root = root;
4386                 ref->node.full_backref = 0;
4387         }
4388         list_add_tail(&ref->node.list, &rec->backrefs);
4389
4390         return ref;
4391 }
4392
4393 static struct data_backref *find_data_backref(struct extent_record *rec,
4394                                                 u64 parent, u64 root,
4395                                                 u64 owner, u64 offset,
4396                                                 int found_ref,
4397                                                 u64 disk_bytenr, u64 bytes)
4398 {
4399         struct list_head *cur = rec->backrefs.next;
4400         struct extent_backref *node;
4401         struct data_backref *back;
4402
4403         while(cur != &rec->backrefs) {
4404                 node = list_entry(cur, struct extent_backref, list);
4405                 cur = cur->next;
4406                 if (!node->is_data)
4407                         continue;
4408                 back = (struct data_backref *)node;
4409                 if (parent > 0) {
4410                         if (!node->full_backref)
4411                                 continue;
4412                         if (parent == back->parent)
4413                                 return back;
4414                 } else {
4415                         if (node->full_backref)
4416                                 continue;
4417                         if (back->root == root && back->owner == owner &&
4418                             back->offset == offset) {
4419                                 if (found_ref && node->found_ref &&
4420                                     (back->bytes != bytes ||
4421                                     back->disk_bytenr != disk_bytenr))
4422                                         continue;
4423                                 return back;
4424                         }
4425                 }
4426         }
4427         return NULL;
4428 }
4429
4430 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4431                                                 u64 parent, u64 root,
4432                                                 u64 owner, u64 offset,
4433                                                 u64 max_size)
4434 {
4435         struct data_backref *ref = malloc(sizeof(*ref));
4436
4437         if (!ref)
4438                 return NULL;
4439         memset(&ref->node, 0, sizeof(ref->node));
4440         ref->node.is_data = 1;
4441
4442         if (parent > 0) {
4443                 ref->parent = parent;
4444                 ref->owner = 0;
4445                 ref->offset = 0;
4446                 ref->node.full_backref = 1;
4447         } else {
4448                 ref->root = root;
4449                 ref->owner = owner;
4450                 ref->offset = offset;
4451                 ref->node.full_backref = 0;
4452         }
4453         ref->bytes = max_size;
4454         ref->found_ref = 0;
4455         ref->num_refs = 0;
4456         list_add_tail(&ref->node.list, &rec->backrefs);
4457         if (max_size > rec->max_size)
4458                 rec->max_size = max_size;
4459         return ref;
4460 }
4461
4462 /* Check if the type of extent matches with its chunk */
4463 static void check_extent_type(struct extent_record *rec)
4464 {
4465         struct btrfs_block_group_cache *bg_cache;
4466
4467         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4468         if (!bg_cache)
4469                 return;
4470
4471         /* data extent, check chunk directly*/
4472         if (!rec->metadata) {
4473                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4474                         rec->wrong_chunk_type = 1;
4475                 return;
4476         }
4477
4478         /* metadata extent, check the obvious case first */
4479         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4480                                  BTRFS_BLOCK_GROUP_METADATA))) {
4481                 rec->wrong_chunk_type = 1;
4482                 return;
4483         }
4484
4485         /*
4486          * Check SYSTEM extent, as it's also marked as metadata, we can only
4487          * make sure it's a SYSTEM extent by its backref
4488          */
4489         if (!list_empty(&rec->backrefs)) {
4490                 struct extent_backref *node;
4491                 struct tree_backref *tback;
4492                 u64 bg_type;
4493
4494                 node = list_entry(rec->backrefs.next, struct extent_backref,
4495                                   list);
4496                 if (node->is_data) {
4497                         /* tree block shouldn't have data backref */
4498                         rec->wrong_chunk_type = 1;
4499                         return;
4500                 }
4501                 tback = container_of(node, struct tree_backref, node);
4502
4503                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4504                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4505                 else
4506                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4507                 if (!(bg_cache->flags & bg_type))
4508                         rec->wrong_chunk_type = 1;
4509         }
4510 }
4511
4512 /*
4513  * Allocate a new extent record, fill default values from @tmpl and insert int
4514  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4515  * the cache, otherwise it fails.
4516  */
4517 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4518                 struct extent_record *tmpl)
4519 {
4520         struct extent_record *rec;
4521         int ret = 0;
4522
4523         rec = malloc(sizeof(*rec));
4524         if (!rec)
4525                 return -ENOMEM;
4526         rec->start = tmpl->start;
4527         rec->max_size = tmpl->max_size;
4528         rec->nr = max(tmpl->nr, tmpl->max_size);
4529         rec->found_rec = tmpl->found_rec;
4530         rec->content_checked = tmpl->content_checked;
4531         rec->owner_ref_checked = tmpl->owner_ref_checked;
4532         rec->num_duplicates = 0;
4533         rec->metadata = tmpl->metadata;
4534         rec->flag_block_full_backref = -1;
4535         rec->bad_full_backref = 0;
4536         rec->crossing_stripes = 0;
4537         rec->wrong_chunk_type = 0;
4538         rec->is_root = tmpl->is_root;
4539         rec->refs = tmpl->refs;
4540         rec->extent_item_refs = tmpl->extent_item_refs;
4541         rec->parent_generation = tmpl->parent_generation;
4542         INIT_LIST_HEAD(&rec->backrefs);
4543         INIT_LIST_HEAD(&rec->dups);
4544         INIT_LIST_HEAD(&rec->list);
4545         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4546         rec->cache.start = tmpl->start;
4547         rec->cache.size = tmpl->nr;
4548         ret = insert_cache_extent(extent_cache, &rec->cache);
4549         BUG_ON(ret);
4550         bytes_used += tmpl->nr;
4551
4552         if (tmpl->metadata)
4553                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4554                                 rec->max_size);
4555         check_extent_type(rec);
4556         return ret;
4557 }
4558
4559 static int add_extent_rec(struct cache_tree *extent_cache,
4560                           struct btrfs_key *parent_key, u64 parent_gen,
4561                           u64 start, u64 nr, u64 extent_item_refs,
4562                           int is_root, int inc_ref, int set_checked,
4563                           int metadata, int extent_rec, u64 max_size)
4564 {
4565         struct extent_record *rec;
4566         struct cache_extent *cache;
4567         struct extent_record tmpl;
4568         int ret = 0;
4569         int dup = 0;
4570
4571         cache = lookup_cache_extent(extent_cache, start, nr);
4572         if (cache) {
4573                 rec = container_of(cache, struct extent_record, cache);
4574                 if (inc_ref)
4575                         rec->refs++;
4576                 if (rec->nr == 1)
4577                         rec->nr = max(nr, max_size);
4578
4579                 /*
4580                  * We need to make sure to reset nr to whatever the extent
4581                  * record says was the real size, this way we can compare it to
4582                  * the backrefs.
4583                  */
4584                 if (extent_rec) {
4585                         if (start != rec->start || rec->found_rec) {
4586                                 struct extent_record *tmp;
4587
4588                                 dup = 1;
4589                                 if (list_empty(&rec->list))
4590                                         list_add_tail(&rec->list,
4591                                                       &duplicate_extents);
4592
4593                                 /*
4594                                  * We have to do this song and dance in case we
4595                                  * find an extent record that falls inside of
4596                                  * our current extent record but does not have
4597                                  * the same objectid.
4598                                  */
4599                                 tmp = malloc(sizeof(*tmp));
4600                                 if (!tmp)
4601                                         return -ENOMEM;
4602                                 tmp->start = start;
4603                                 tmp->max_size = max_size;
4604                                 tmp->nr = nr;
4605                                 tmp->found_rec = 1;
4606                                 tmp->metadata = metadata;
4607                                 tmp->extent_item_refs = extent_item_refs;
4608                                 INIT_LIST_HEAD(&tmp->list);
4609                                 list_add_tail(&tmp->list, &rec->dups);
4610                                 rec->num_duplicates++;
4611                         } else {
4612                                 rec->nr = nr;
4613                                 rec->found_rec = 1;
4614                         }
4615                 }
4616
4617                 if (extent_item_refs && !dup) {
4618                         if (rec->extent_item_refs) {
4619                                 fprintf(stderr, "block %llu rec "
4620                                         "extent_item_refs %llu, passed %llu\n",
4621                                         (unsigned long long)start,
4622                                         (unsigned long long)
4623                                                         rec->extent_item_refs,
4624                                         (unsigned long long)extent_item_refs);
4625                         }
4626                         rec->extent_item_refs = extent_item_refs;
4627                 }
4628                 if (is_root)
4629                         rec->is_root = 1;
4630                 if (set_checked) {
4631                         rec->content_checked = 1;
4632                         rec->owner_ref_checked = 1;
4633                 }
4634
4635                 if (parent_key)
4636                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4637                 if (parent_gen)
4638                         rec->parent_generation = parent_gen;
4639
4640                 if (rec->max_size < max_size)
4641                         rec->max_size = max_size;
4642
4643                 /*
4644                  * A metadata extent can't cross stripe_len boundary, otherwise
4645                  * kernel scrub won't be able to handle it.
4646                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4647                  * it.
4648                  */
4649                 if (metadata)
4650                         rec->crossing_stripes = check_crossing_stripes(
4651                                         rec->start, rec->max_size);
4652                 check_extent_type(rec);
4653                 maybe_free_extent_rec(extent_cache, rec);
4654                 return ret;
4655         }
4656
4657         memset(&tmpl, 0, sizeof(tmpl));
4658
4659         if (parent_key)
4660                 btrfs_cpu_key_to_disk(&tmpl.parent_key, parent_key);
4661         tmpl.parent_generation = parent_gen;
4662         tmpl.start = start;
4663         tmpl.nr = nr;
4664         tmpl.extent_item_refs = extent_item_refs;
4665         tmpl.is_root = is_root;
4666         tmpl.metadata = metadata;
4667         tmpl.found_rec = extent_rec;
4668         tmpl.max_size = max_size;
4669         tmpl.content_checked = set_checked;
4670         tmpl.owner_ref_checked = set_checked;
4671         tmpl.refs = !!inc_ref;
4672
4673         ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4674
4675         return ret;
4676 }
4677
4678 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4679                             u64 parent, u64 root, int found_ref)
4680 {
4681         struct extent_record *rec;
4682         struct tree_backref *back;
4683         struct cache_extent *cache;
4684
4685         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4686         if (!cache) {
4687                 struct extent_record tmpl;
4688
4689                 memset(&tmpl, 0, sizeof(tmpl));
4690                 tmpl.start = bytenr;
4691                 tmpl.nr = 1;
4692                 tmpl.metadata = 1;
4693
4694                 add_extent_rec_nolookup(extent_cache, &tmpl);
4695
4696                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4697                 if (!cache)
4698                         abort();
4699         }
4700
4701         rec = container_of(cache, struct extent_record, cache);
4702         if (rec->start != bytenr) {
4703                 abort();
4704         }
4705
4706         back = find_tree_backref(rec, parent, root);
4707         if (!back) {
4708                 back = alloc_tree_backref(rec, parent, root);
4709                 BUG_ON(!back);
4710         }
4711
4712         if (found_ref) {
4713                 if (back->node.found_ref) {
4714                         fprintf(stderr, "Extent back ref already exists "
4715                                 "for %llu parent %llu root %llu \n",
4716                                 (unsigned long long)bytenr,
4717                                 (unsigned long long)parent,
4718                                 (unsigned long long)root);
4719                 }
4720                 back->node.found_ref = 1;
4721         } else {
4722                 if (back->node.found_extent_tree) {
4723                         fprintf(stderr, "Extent back ref already exists "
4724                                 "for %llu parent %llu root %llu \n",
4725                                 (unsigned long long)bytenr,
4726                                 (unsigned long long)parent,
4727                                 (unsigned long long)root);
4728                 }
4729                 back->node.found_extent_tree = 1;
4730         }
4731         check_extent_type(rec);
4732         maybe_free_extent_rec(extent_cache, rec);
4733         return 0;
4734 }
4735
4736 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4737                             u64 parent, u64 root, u64 owner, u64 offset,
4738                             u32 num_refs, int found_ref, u64 max_size)
4739 {
4740         struct extent_record *rec;
4741         struct data_backref *back;
4742         struct cache_extent *cache;
4743
4744         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4745         if (!cache) {
4746                 struct extent_record tmpl;
4747
4748                 memset(&tmpl, 0, sizeof(tmpl));
4749                 tmpl.start = bytenr;
4750                 tmpl.nr = 1;
4751                 tmpl.max_size = max_size;
4752
4753                 add_extent_rec_nolookup(extent_cache, &tmpl);
4754
4755                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4756                 if (!cache)
4757                         abort();
4758         }
4759
4760         rec = container_of(cache, struct extent_record, cache);
4761         if (rec->max_size < max_size)
4762                 rec->max_size = max_size;
4763
4764         /*
4765          * If found_ref is set then max_size is the real size and must match the
4766          * existing refs.  So if we have already found a ref then we need to
4767          * make sure that this ref matches the existing one, otherwise we need
4768          * to add a new backref so we can notice that the backrefs don't match
4769          * and we need to figure out who is telling the truth.  This is to
4770          * account for that awful fsync bug I introduced where we'd end up with
4771          * a btrfs_file_extent_item that would have its length include multiple
4772          * prealloc extents or point inside of a prealloc extent.
4773          */
4774         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4775                                  bytenr, max_size);
4776         if (!back) {
4777                 back = alloc_data_backref(rec, parent, root, owner, offset,
4778                                           max_size);
4779                 BUG_ON(!back);
4780         }
4781
4782         if (found_ref) {
4783                 BUG_ON(num_refs != 1);
4784                 if (back->node.found_ref)
4785                         BUG_ON(back->bytes != max_size);
4786                 back->node.found_ref = 1;
4787                 back->found_ref += 1;
4788                 back->bytes = max_size;
4789                 back->disk_bytenr = bytenr;
4790                 rec->refs += 1;
4791                 rec->content_checked = 1;
4792                 rec->owner_ref_checked = 1;
4793         } else {
4794                 if (back->node.found_extent_tree) {
4795                         fprintf(stderr, "Extent back ref already exists "
4796                                 "for %llu parent %llu root %llu "
4797                                 "owner %llu offset %llu num_refs %lu\n",
4798                                 (unsigned long long)bytenr,
4799                                 (unsigned long long)parent,
4800                                 (unsigned long long)root,
4801                                 (unsigned long long)owner,
4802                                 (unsigned long long)offset,
4803                                 (unsigned long)num_refs);
4804                 }
4805                 back->num_refs = num_refs;
4806                 back->node.found_extent_tree = 1;
4807         }
4808         maybe_free_extent_rec(extent_cache, rec);
4809         return 0;
4810 }
4811
4812 static int add_pending(struct cache_tree *pending,
4813                        struct cache_tree *seen, u64 bytenr, u32 size)
4814 {
4815         int ret;
4816         ret = add_cache_extent(seen, bytenr, size);
4817         if (ret)
4818                 return ret;
4819         add_cache_extent(pending, bytenr, size);
4820         return 0;
4821 }
4822
4823 static int pick_next_pending(struct cache_tree *pending,
4824                         struct cache_tree *reada,
4825                         struct cache_tree *nodes,
4826                         u64 last, struct block_info *bits, int bits_nr,
4827                         int *reada_bits)
4828 {
4829         unsigned long node_start = last;
4830         struct cache_extent *cache;
4831         int ret;
4832
4833         cache = search_cache_extent(reada, 0);
4834         if (cache) {
4835                 bits[0].start = cache->start;
4836                 bits[0].size = cache->size;
4837                 *reada_bits = 1;
4838                 return 1;
4839         }
4840         *reada_bits = 0;
4841         if (node_start > 32768)
4842                 node_start -= 32768;
4843
4844         cache = search_cache_extent(nodes, node_start);
4845         if (!cache)
4846                 cache = search_cache_extent(nodes, 0);
4847
4848         if (!cache) {
4849                  cache = search_cache_extent(pending, 0);
4850                  if (!cache)
4851                          return 0;
4852                  ret = 0;
4853                  do {
4854                          bits[ret].start = cache->start;
4855                          bits[ret].size = cache->size;
4856                          cache = next_cache_extent(cache);
4857                          ret++;
4858                  } while (cache && ret < bits_nr);
4859                  return ret;
4860         }
4861
4862         ret = 0;
4863         do {
4864                 bits[ret].start = cache->start;
4865                 bits[ret].size = cache->size;
4866                 cache = next_cache_extent(cache);
4867                 ret++;
4868         } while (cache && ret < bits_nr);
4869
4870         if (bits_nr - ret > 8) {
4871                 u64 lookup = bits[0].start + bits[0].size;
4872                 struct cache_extent *next;
4873                 next = search_cache_extent(pending, lookup);
4874                 while(next) {
4875                         if (next->start - lookup > 32768)
4876                                 break;
4877                         bits[ret].start = next->start;
4878                         bits[ret].size = next->size;
4879                         lookup = next->start + next->size;
4880                         ret++;
4881                         if (ret == bits_nr)
4882                                 break;
4883                         next = next_cache_extent(next);
4884                         if (!next)
4885                                 break;
4886                 }
4887         }
4888         return ret;
4889 }
4890
4891 static void free_chunk_record(struct cache_extent *cache)
4892 {
4893         struct chunk_record *rec;
4894
4895         rec = container_of(cache, struct chunk_record, cache);
4896         list_del_init(&rec->list);
4897         list_del_init(&rec->dextents);
4898         free(rec);
4899 }
4900
4901 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4902 {
4903         cache_tree_free_extents(chunk_cache, free_chunk_record);
4904 }
4905
4906 static void free_device_record(struct rb_node *node)
4907 {
4908         struct device_record *rec;
4909
4910         rec = container_of(node, struct device_record, node);
4911         free(rec);
4912 }
4913
4914 FREE_RB_BASED_TREE(device_cache, free_device_record);
4915
4916 int insert_block_group_record(struct block_group_tree *tree,
4917                               struct block_group_record *bg_rec)
4918 {
4919         int ret;
4920
4921         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4922         if (ret)
4923                 return ret;
4924
4925         list_add_tail(&bg_rec->list, &tree->block_groups);
4926         return 0;
4927 }
4928
4929 static void free_block_group_record(struct cache_extent *cache)
4930 {
4931         struct block_group_record *rec;
4932
4933         rec = container_of(cache, struct block_group_record, cache);
4934         list_del_init(&rec->list);
4935         free(rec);
4936 }
4937
4938 void free_block_group_tree(struct block_group_tree *tree)
4939 {
4940         cache_tree_free_extents(&tree->tree, free_block_group_record);
4941 }
4942
4943 int insert_device_extent_record(struct device_extent_tree *tree,
4944                                 struct device_extent_record *de_rec)
4945 {
4946         int ret;
4947
4948         /*
4949          * Device extent is a bit different from the other extents, because
4950          * the extents which belong to the different devices may have the
4951          * same start and size, so we need use the special extent cache
4952          * search/insert functions.
4953          */
4954         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4955         if (ret)
4956                 return ret;
4957
4958         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4959         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4960         return 0;
4961 }
4962
4963 static void free_device_extent_record(struct cache_extent *cache)
4964 {
4965         struct device_extent_record *rec;
4966
4967         rec = container_of(cache, struct device_extent_record, cache);
4968         if (!list_empty(&rec->chunk_list))
4969                 list_del_init(&rec->chunk_list);
4970         if (!list_empty(&rec->device_list))
4971                 list_del_init(&rec->device_list);
4972         free(rec);
4973 }
4974
4975 void free_device_extent_tree(struct device_extent_tree *tree)
4976 {
4977         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4978 }
4979
4980 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4981 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4982                                  struct extent_buffer *leaf, int slot)
4983 {
4984         struct btrfs_extent_ref_v0 *ref0;
4985         struct btrfs_key key;
4986
4987         btrfs_item_key_to_cpu(leaf, &key, slot);
4988         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4989         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4990                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4991         } else {
4992                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4993                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4994         }
4995         return 0;
4996 }
4997 #endif
4998
4999 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5000                                             struct btrfs_key *key,
5001                                             int slot)
5002 {
5003         struct btrfs_chunk *ptr;
5004         struct chunk_record *rec;
5005         int num_stripes, i;
5006
5007         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5008         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5009
5010         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5011         if (!rec) {
5012                 fprintf(stderr, "memory allocation failed\n");
5013                 exit(-1);
5014         }
5015
5016         INIT_LIST_HEAD(&rec->list);
5017         INIT_LIST_HEAD(&rec->dextents);
5018         rec->bg_rec = NULL;
5019
5020         rec->cache.start = key->offset;
5021         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5022
5023         rec->generation = btrfs_header_generation(leaf);
5024
5025         rec->objectid = key->objectid;
5026         rec->type = key->type;
5027         rec->offset = key->offset;
5028
5029         rec->length = rec->cache.size;
5030         rec->owner = btrfs_chunk_owner(leaf, ptr);
5031         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5032         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5033         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5034         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5035         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5036         rec->num_stripes = num_stripes;
5037         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5038
5039         for (i = 0; i < rec->num_stripes; ++i) {
5040                 rec->stripes[i].devid =
5041                         btrfs_stripe_devid_nr(leaf, ptr, i);
5042                 rec->stripes[i].offset =
5043                         btrfs_stripe_offset_nr(leaf, ptr, i);
5044                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5045                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5046                                 BTRFS_UUID_SIZE);
5047         }
5048
5049         return rec;
5050 }
5051
5052 static int process_chunk_item(struct cache_tree *chunk_cache,
5053                               struct btrfs_key *key, struct extent_buffer *eb,
5054                               int slot)
5055 {
5056         struct chunk_record *rec;
5057         int ret = 0;
5058
5059         rec = btrfs_new_chunk_record(eb, key, slot);
5060         ret = insert_cache_extent(chunk_cache, &rec->cache);
5061         if (ret) {
5062                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5063                         rec->offset, rec->length);
5064                 free(rec);
5065         }
5066
5067         return ret;
5068 }
5069
5070 static int process_device_item(struct rb_root *dev_cache,
5071                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5072 {
5073         struct btrfs_dev_item *ptr;
5074         struct device_record *rec;
5075         int ret = 0;
5076
5077         ptr = btrfs_item_ptr(eb,
5078                 slot, struct btrfs_dev_item);
5079
5080         rec = malloc(sizeof(*rec));
5081         if (!rec) {
5082                 fprintf(stderr, "memory allocation failed\n");
5083                 return -ENOMEM;
5084         }
5085
5086         rec->devid = key->offset;
5087         rec->generation = btrfs_header_generation(eb);
5088
5089         rec->objectid = key->objectid;
5090         rec->type = key->type;
5091         rec->offset = key->offset;
5092
5093         rec->devid = btrfs_device_id(eb, ptr);
5094         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5095         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5096
5097         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5098         if (ret) {
5099                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5100                 free(rec);
5101         }
5102
5103         return ret;
5104 }
5105
5106 struct block_group_record *
5107 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5108                              int slot)
5109 {
5110         struct btrfs_block_group_item *ptr;
5111         struct block_group_record *rec;
5112
5113         rec = calloc(1, sizeof(*rec));
5114         if (!rec) {
5115                 fprintf(stderr, "memory allocation failed\n");
5116                 exit(-1);
5117         }
5118
5119         rec->cache.start = key->objectid;
5120         rec->cache.size = key->offset;
5121
5122         rec->generation = btrfs_header_generation(leaf);
5123
5124         rec->objectid = key->objectid;
5125         rec->type = key->type;
5126         rec->offset = key->offset;
5127
5128         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5129         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5130
5131         INIT_LIST_HEAD(&rec->list);
5132
5133         return rec;
5134 }
5135
5136 static int process_block_group_item(struct block_group_tree *block_group_cache,
5137                                     struct btrfs_key *key,
5138                                     struct extent_buffer *eb, int slot)
5139 {
5140         struct block_group_record *rec;
5141         int ret = 0;
5142
5143         rec = btrfs_new_block_group_record(eb, key, slot);
5144         ret = insert_block_group_record(block_group_cache, rec);
5145         if (ret) {
5146                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5147                         rec->objectid, rec->offset);
5148                 free(rec);
5149         }
5150
5151         return ret;
5152 }
5153
5154 struct device_extent_record *
5155 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5156                                struct btrfs_key *key, int slot)
5157 {
5158         struct device_extent_record *rec;
5159         struct btrfs_dev_extent *ptr;
5160
5161         rec = calloc(1, sizeof(*rec));
5162         if (!rec) {
5163                 fprintf(stderr, "memory allocation failed\n");
5164                 exit(-1);
5165         }
5166
5167         rec->cache.objectid = key->objectid;
5168         rec->cache.start = key->offset;
5169
5170         rec->generation = btrfs_header_generation(leaf);
5171
5172         rec->objectid = key->objectid;
5173         rec->type = key->type;
5174         rec->offset = key->offset;
5175
5176         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5177         rec->chunk_objecteid =
5178                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5179         rec->chunk_offset =
5180                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5181         rec->length = btrfs_dev_extent_length(leaf, ptr);
5182         rec->cache.size = rec->length;
5183
5184         INIT_LIST_HEAD(&rec->chunk_list);
5185         INIT_LIST_HEAD(&rec->device_list);
5186
5187         return rec;
5188 }
5189
5190 static int
5191 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5192                            struct btrfs_key *key, struct extent_buffer *eb,
5193                            int slot)
5194 {
5195         struct device_extent_record *rec;
5196         int ret;
5197
5198         rec = btrfs_new_device_extent_record(eb, key, slot);
5199         ret = insert_device_extent_record(dev_extent_cache, rec);
5200         if (ret) {
5201                 fprintf(stderr,
5202                         "Device extent[%llu, %llu, %llu] existed.\n",
5203                         rec->objectid, rec->offset, rec->length);
5204                 free(rec);
5205         }
5206
5207         return ret;
5208 }
5209
5210 static int process_extent_item(struct btrfs_root *root,
5211                                struct cache_tree *extent_cache,
5212                                struct extent_buffer *eb, int slot)
5213 {
5214         struct btrfs_extent_item *ei;
5215         struct btrfs_extent_inline_ref *iref;
5216         struct btrfs_extent_data_ref *dref;
5217         struct btrfs_shared_data_ref *sref;
5218         struct btrfs_key key;
5219         unsigned long end;
5220         unsigned long ptr;
5221         int type;
5222         u32 item_size = btrfs_item_size_nr(eb, slot);
5223         u64 refs = 0;
5224         u64 offset;
5225         u64 num_bytes;
5226         int metadata = 0;
5227
5228         btrfs_item_key_to_cpu(eb, &key, slot);
5229
5230         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5231                 metadata = 1;
5232                 num_bytes = root->nodesize;
5233         } else {
5234                 num_bytes = key.offset;
5235         }
5236
5237         if (item_size < sizeof(*ei)) {
5238 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5239                 struct btrfs_extent_item_v0 *ei0;
5240                 BUG_ON(item_size != sizeof(*ei0));
5241                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5242                 refs = btrfs_extent_refs_v0(eb, ei0);
5243 #else
5244                 BUG();
5245 #endif
5246                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5247                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5248                                       num_bytes);
5249         }
5250
5251         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5252         refs = btrfs_extent_refs(eb, ei);
5253         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5254                 metadata = 1;
5255         else
5256                 metadata = 0;
5257
5258         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5259                        refs, 0, 0, 0, metadata, 1, num_bytes);
5260
5261         ptr = (unsigned long)(ei + 1);
5262         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5263             key.type == BTRFS_EXTENT_ITEM_KEY)
5264                 ptr += sizeof(struct btrfs_tree_block_info);
5265
5266         end = (unsigned long)ei + item_size;
5267         while (ptr < end) {
5268                 iref = (struct btrfs_extent_inline_ref *)ptr;
5269                 type = btrfs_extent_inline_ref_type(eb, iref);
5270                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5271                 switch (type) {
5272                 case BTRFS_TREE_BLOCK_REF_KEY:
5273                         add_tree_backref(extent_cache, key.objectid,
5274                                          0, offset, 0);
5275                         break;
5276                 case BTRFS_SHARED_BLOCK_REF_KEY:
5277                         add_tree_backref(extent_cache, key.objectid,
5278                                          offset, 0, 0);
5279                         break;
5280                 case BTRFS_EXTENT_DATA_REF_KEY:
5281                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5282                         add_data_backref(extent_cache, key.objectid, 0,
5283                                         btrfs_extent_data_ref_root(eb, dref),
5284                                         btrfs_extent_data_ref_objectid(eb,
5285                                                                        dref),
5286                                         btrfs_extent_data_ref_offset(eb, dref),
5287                                         btrfs_extent_data_ref_count(eb, dref),
5288                                         0, num_bytes);
5289                         break;
5290                 case BTRFS_SHARED_DATA_REF_KEY:
5291                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5292                         add_data_backref(extent_cache, key.objectid, offset,
5293                                         0, 0, 0,
5294                                         btrfs_shared_data_ref_count(eb, sref),
5295                                         0, num_bytes);
5296                         break;
5297                 default:
5298                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5299                                 key.objectid, key.type, num_bytes);
5300                         goto out;
5301                 }
5302                 ptr += btrfs_extent_inline_ref_size(type);
5303         }
5304         WARN_ON(ptr > end);
5305 out:
5306         return 0;
5307 }
5308
5309 static int check_cache_range(struct btrfs_root *root,
5310                              struct btrfs_block_group_cache *cache,
5311                              u64 offset, u64 bytes)
5312 {
5313         struct btrfs_free_space *entry;
5314         u64 *logical;
5315         u64 bytenr;
5316         int stripe_len;
5317         int i, nr, ret;
5318
5319         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5320                 bytenr = btrfs_sb_offset(i);
5321                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5322                                        cache->key.objectid, bytenr, 0,
5323                                        &logical, &nr, &stripe_len);
5324                 if (ret)
5325                         return ret;
5326
5327                 while (nr--) {
5328                         if (logical[nr] + stripe_len <= offset)
5329                                 continue;
5330                         if (offset + bytes <= logical[nr])
5331                                 continue;
5332                         if (logical[nr] == offset) {
5333                                 if (stripe_len >= bytes) {
5334                                         kfree(logical);
5335                                         return 0;
5336                                 }
5337                                 bytes -= stripe_len;
5338                                 offset += stripe_len;
5339                         } else if (logical[nr] < offset) {
5340                                 if (logical[nr] + stripe_len >=
5341                                     offset + bytes) {
5342                                         kfree(logical);
5343                                         return 0;
5344                                 }
5345                                 bytes = (offset + bytes) -
5346                                         (logical[nr] + stripe_len);
5347                                 offset = logical[nr] + stripe_len;
5348                         } else {
5349                                 /*
5350                                  * Could be tricky, the super may land in the
5351                                  * middle of the area we're checking.  First
5352                                  * check the easiest case, it's at the end.
5353                                  */
5354                                 if (logical[nr] + stripe_len >=
5355                                     bytes + offset) {
5356                                         bytes = logical[nr] - offset;
5357                                         continue;
5358                                 }
5359
5360                                 /* Check the left side */
5361                                 ret = check_cache_range(root, cache,
5362                                                         offset,
5363                                                         logical[nr] - offset);
5364                                 if (ret) {
5365                                         kfree(logical);
5366                                         return ret;
5367                                 }
5368
5369                                 /* Now we continue with the right side */
5370                                 bytes = (offset + bytes) -
5371                                         (logical[nr] + stripe_len);
5372                                 offset = logical[nr] + stripe_len;
5373                         }
5374                 }
5375
5376                 kfree(logical);
5377         }
5378
5379         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5380         if (!entry) {
5381                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5382                         offset, offset+bytes);
5383                 return -EINVAL;
5384         }
5385
5386         if (entry->offset != offset) {
5387                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5388                         entry->offset);
5389                 return -EINVAL;
5390         }
5391
5392         if (entry->bytes != bytes) {
5393                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5394                         bytes, entry->bytes, offset);
5395                 return -EINVAL;
5396         }
5397
5398         unlink_free_space(cache->free_space_ctl, entry);
5399         free(entry);
5400         return 0;
5401 }
5402
5403 static int verify_space_cache(struct btrfs_root *root,
5404                               struct btrfs_block_group_cache *cache)
5405 {
5406         struct btrfs_path *path;
5407         struct extent_buffer *leaf;
5408         struct btrfs_key key;
5409         u64 last;
5410         int ret = 0;
5411
5412         path = btrfs_alloc_path();
5413         if (!path)
5414                 return -ENOMEM;
5415
5416         root = root->fs_info->extent_root;
5417
5418         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5419
5420         key.objectid = last;
5421         key.offset = 0;
5422         key.type = BTRFS_EXTENT_ITEM_KEY;
5423
5424         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5425         if (ret < 0)
5426                 goto out;
5427         ret = 0;
5428         while (1) {
5429                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5430                         ret = btrfs_next_leaf(root, path);
5431                         if (ret < 0)
5432                                 goto out;
5433                         if (ret > 0) {
5434                                 ret = 0;
5435                                 break;
5436                         }
5437                 }
5438                 leaf = path->nodes[0];
5439                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5440                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5441                         break;
5442                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5443                     key.type != BTRFS_METADATA_ITEM_KEY) {
5444                         path->slots[0]++;
5445                         continue;
5446                 }
5447
5448                 if (last == key.objectid) {
5449                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5450                                 last = key.objectid + key.offset;
5451                         else
5452                                 last = key.objectid + root->nodesize;
5453                         path->slots[0]++;
5454                         continue;
5455                 }
5456
5457                 ret = check_cache_range(root, cache, last,
5458                                         key.objectid - last);
5459                 if (ret)
5460                         break;
5461                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5462                         last = key.objectid + key.offset;
5463                 else
5464                         last = key.objectid + root->nodesize;
5465                 path->slots[0]++;
5466         }
5467
5468         if (last < cache->key.objectid + cache->key.offset)
5469                 ret = check_cache_range(root, cache, last,
5470                                         cache->key.objectid +
5471                                         cache->key.offset - last);
5472
5473 out:
5474         btrfs_free_path(path);
5475
5476         if (!ret &&
5477             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5478                 fprintf(stderr, "There are still entries left in the space "
5479                         "cache\n");
5480                 ret = -EINVAL;
5481         }
5482
5483         return ret;
5484 }
5485
5486 static int check_space_cache(struct btrfs_root *root)
5487 {
5488         struct btrfs_block_group_cache *cache;
5489         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5490         int ret;
5491         int error = 0;
5492
5493         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5494             btrfs_super_generation(root->fs_info->super_copy) !=
5495             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5496                 printf("cache and super generation don't match, space cache "
5497                        "will be invalidated\n");
5498                 return 0;
5499         }
5500
5501         if (ctx.progress_enabled) {
5502                 ctx.tp = TASK_FREE_SPACE;
5503                 task_start(ctx.info);
5504         }
5505
5506         while (1) {
5507                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5508                 if (!cache)
5509                         break;
5510
5511                 start = cache->key.objectid + cache->key.offset;
5512                 if (!cache->free_space_ctl) {
5513                         if (btrfs_init_free_space_ctl(cache,
5514                                                       root->sectorsize)) {
5515                                 ret = -ENOMEM;
5516                                 break;
5517                         }
5518                 } else {
5519                         btrfs_remove_free_space_cache(cache);
5520                 }
5521
5522                 if (btrfs_fs_compat_ro(root->fs_info,
5523                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5524                         ret = exclude_super_stripes(root, cache);
5525                         if (ret) {
5526                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5527                                         strerror(-ret));
5528                                 error++;
5529                                 continue;
5530                         }
5531                         ret = load_free_space_tree(root->fs_info, cache);
5532                         free_excluded_extents(root, cache);
5533                         if (ret < 0) {
5534                                 fprintf(stderr, "could not load free space tree: %s\n",
5535                                         strerror(-ret));
5536                                 error++;
5537                                 continue;
5538                         }
5539                         error += ret;
5540                 } else {
5541                         ret = load_free_space_cache(root->fs_info, cache);
5542                         if (!ret)
5543                                 continue;
5544                 }
5545
5546                 ret = verify_space_cache(root, cache);
5547                 if (ret) {
5548                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5549                                 cache->key.objectid);
5550                         error++;
5551                 }
5552         }
5553
5554         task_stop(ctx.info);
5555
5556         return error ? -EINVAL : 0;
5557 }
5558
5559 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5560                         u64 num_bytes, unsigned long leaf_offset,
5561                         struct extent_buffer *eb) {
5562
5563         u64 offset = 0;
5564         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5565         char *data;
5566         unsigned long csum_offset;
5567         u32 csum;
5568         u32 csum_expected;
5569         u64 read_len;
5570         u64 data_checked = 0;
5571         u64 tmp;
5572         int ret = 0;
5573         int mirror;
5574         int num_copies;
5575
5576         if (num_bytes % root->sectorsize)
5577                 return -EINVAL;
5578
5579         data = malloc(num_bytes);
5580         if (!data)
5581                 return -ENOMEM;
5582
5583         while (offset < num_bytes) {
5584                 mirror = 0;
5585 again:
5586                 read_len = num_bytes - offset;
5587                 /* read as much space once a time */
5588                 ret = read_extent_data(root, data + offset,
5589                                 bytenr + offset, &read_len, mirror);
5590                 if (ret)
5591                         goto out;
5592                 data_checked = 0;
5593                 /* verify every 4k data's checksum */
5594                 while (data_checked < read_len) {
5595                         csum = ~(u32)0;
5596                         tmp = offset + data_checked;
5597
5598                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5599                                                csum, root->sectorsize);
5600                         btrfs_csum_final(csum, (char *)&csum);
5601
5602                         csum_offset = leaf_offset +
5603                                  tmp / root->sectorsize * csum_size;
5604                         read_extent_buffer(eb, (char *)&csum_expected,
5605                                            csum_offset, csum_size);
5606                         /* try another mirror */
5607                         if (csum != csum_expected) {
5608                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5609                                                 mirror, bytenr + tmp,
5610                                                 csum, csum_expected);
5611                                 num_copies = btrfs_num_copies(
5612                                                 &root->fs_info->mapping_tree,
5613                                                 bytenr, num_bytes);
5614                                 if (mirror < num_copies - 1) {
5615                                         mirror += 1;
5616                                         goto again;
5617                                 }
5618                         }
5619                         data_checked += root->sectorsize;
5620                 }
5621                 offset += read_len;
5622         }
5623 out:
5624         free(data);
5625         return ret;
5626 }
5627
5628 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5629                                u64 num_bytes)
5630 {
5631         struct btrfs_path *path;
5632         struct extent_buffer *leaf;
5633         struct btrfs_key key;
5634         int ret;
5635
5636         path = btrfs_alloc_path();
5637         if (!path) {
5638                 fprintf(stderr, "Error allocing path\n");
5639                 return -ENOMEM;
5640         }
5641
5642         key.objectid = bytenr;
5643         key.type = BTRFS_EXTENT_ITEM_KEY;
5644         key.offset = (u64)-1;
5645
5646 again:
5647         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5648                                 0, 0);
5649         if (ret < 0) {
5650                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5651                 btrfs_free_path(path);
5652                 return ret;
5653         } else if (ret) {
5654                 if (path->slots[0] > 0) {
5655                         path->slots[0]--;
5656                 } else {
5657                         ret = btrfs_prev_leaf(root, path);
5658                         if (ret < 0) {
5659                                 goto out;
5660                         } else if (ret > 0) {
5661                                 ret = 0;
5662                                 goto out;
5663                         }
5664                 }
5665         }
5666
5667         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5668
5669         /*
5670          * Block group items come before extent items if they have the same
5671          * bytenr, so walk back one more just in case.  Dear future traveler,
5672          * first congrats on mastering time travel.  Now if it's not too much
5673          * trouble could you go back to 2006 and tell Chris to make the
5674          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5675          * EXTENT_ITEM_KEY please?
5676          */
5677         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5678                 if (path->slots[0] > 0) {
5679                         path->slots[0]--;
5680                 } else {
5681                         ret = btrfs_prev_leaf(root, path);
5682                         if (ret < 0) {
5683                                 goto out;
5684                         } else if (ret > 0) {
5685                                 ret = 0;
5686                                 goto out;
5687                         }
5688                 }
5689                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5690         }
5691
5692         while (num_bytes) {
5693                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5694                         ret = btrfs_next_leaf(root, path);
5695                         if (ret < 0) {
5696                                 fprintf(stderr, "Error going to next leaf "
5697                                         "%d\n", ret);
5698                                 btrfs_free_path(path);
5699                                 return ret;
5700                         } else if (ret) {
5701                                 break;
5702                         }
5703                 }
5704                 leaf = path->nodes[0];
5705                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5706                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5707                         path->slots[0]++;
5708                         continue;
5709                 }
5710                 if (key.objectid + key.offset < bytenr) {
5711                         path->slots[0]++;
5712                         continue;
5713                 }
5714                 if (key.objectid > bytenr + num_bytes)
5715                         break;
5716
5717                 if (key.objectid == bytenr) {
5718                         if (key.offset >= num_bytes) {
5719                                 num_bytes = 0;
5720                                 break;
5721                         }
5722                         num_bytes -= key.offset;
5723                         bytenr += key.offset;
5724                 } else if (key.objectid < bytenr) {
5725                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5726                                 num_bytes = 0;
5727                                 break;
5728                         }
5729                         num_bytes = (bytenr + num_bytes) -
5730                                 (key.objectid + key.offset);
5731                         bytenr = key.objectid + key.offset;
5732                 } else {
5733                         if (key.objectid + key.offset < bytenr + num_bytes) {
5734                                 u64 new_start = key.objectid + key.offset;
5735                                 u64 new_bytes = bytenr + num_bytes - new_start;
5736
5737                                 /*
5738                                  * Weird case, the extent is in the middle of
5739                                  * our range, we'll have to search one side
5740                                  * and then the other.  Not sure if this happens
5741                                  * in real life, but no harm in coding it up
5742                                  * anyway just in case.
5743                                  */
5744                                 btrfs_release_path(path);
5745                                 ret = check_extent_exists(root, new_start,
5746                                                           new_bytes);
5747                                 if (ret) {
5748                                         fprintf(stderr, "Right section didn't "
5749                                                 "have a record\n");
5750                                         break;
5751                                 }
5752                                 num_bytes = key.objectid - bytenr;
5753                                 goto again;
5754                         }
5755                         num_bytes = key.objectid - bytenr;
5756                 }
5757                 path->slots[0]++;
5758         }
5759         ret = 0;
5760
5761 out:
5762         if (num_bytes && !ret) {
5763                 fprintf(stderr, "There are no extents for csum range "
5764                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5765                 ret = 1;
5766         }
5767
5768         btrfs_free_path(path);
5769         return ret;
5770 }
5771
5772 static int check_csums(struct btrfs_root *root)
5773 {
5774         struct btrfs_path *path;
5775         struct extent_buffer *leaf;
5776         struct btrfs_key key;
5777         u64 offset = 0, num_bytes = 0;
5778         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5779         int errors = 0;
5780         int ret;
5781         u64 data_len;
5782         unsigned long leaf_offset;
5783
5784         root = root->fs_info->csum_root;
5785         if (!extent_buffer_uptodate(root->node)) {
5786                 fprintf(stderr, "No valid csum tree found\n");
5787                 return -ENOENT;
5788         }
5789
5790         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5791         key.type = BTRFS_EXTENT_CSUM_KEY;
5792         key.offset = 0;
5793
5794         path = btrfs_alloc_path();
5795         if (!path)
5796                 return -ENOMEM;
5797
5798         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5799         if (ret < 0) {
5800                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5801                 btrfs_free_path(path);
5802                 return ret;
5803         }
5804
5805         if (ret > 0 && path->slots[0])
5806                 path->slots[0]--;
5807         ret = 0;
5808
5809         while (1) {
5810                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5811                         ret = btrfs_next_leaf(root, path);
5812                         if (ret < 0) {
5813                                 fprintf(stderr, "Error going to next leaf "
5814                                         "%d\n", ret);
5815                                 break;
5816                         }
5817                         if (ret)
5818                                 break;
5819                 }
5820                 leaf = path->nodes[0];
5821
5822                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5823                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5824                         path->slots[0]++;
5825                         continue;
5826                 }
5827
5828                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5829                               csum_size) * root->sectorsize;
5830                 if (!check_data_csum)
5831                         goto skip_csum_check;
5832                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5833                 ret = check_extent_csums(root, key.offset, data_len,
5834                                          leaf_offset, leaf);
5835                 if (ret)
5836                         break;
5837 skip_csum_check:
5838                 if (!num_bytes) {
5839                         offset = key.offset;
5840                 } else if (key.offset != offset + num_bytes) {
5841                         ret = check_extent_exists(root, offset, num_bytes);
5842                         if (ret) {
5843                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5844                                         "there is no extent record\n",
5845                                         offset, offset+num_bytes);
5846                                 errors++;
5847                         }
5848                         offset = key.offset;
5849                         num_bytes = 0;
5850                 }
5851                 num_bytes += data_len;
5852                 path->slots[0]++;
5853         }
5854
5855         btrfs_free_path(path);
5856         return errors;
5857 }
5858
5859 static int is_dropped_key(struct btrfs_key *key,
5860                           struct btrfs_key *drop_key) {
5861         if (key->objectid < drop_key->objectid)
5862                 return 1;
5863         else if (key->objectid == drop_key->objectid) {
5864                 if (key->type < drop_key->type)
5865                         return 1;
5866                 else if (key->type == drop_key->type) {
5867                         if (key->offset < drop_key->offset)
5868                                 return 1;
5869                 }
5870         }
5871         return 0;
5872 }
5873
5874 /*
5875  * Here are the rules for FULL_BACKREF.
5876  *
5877  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5878  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5879  *      FULL_BACKREF set.
5880  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5881  *    if it happened after the relocation occurred since we'll have dropped the
5882  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5883  *    have no real way to know for sure.
5884  *
5885  * We process the blocks one root at a time, and we start from the lowest root
5886  * objectid and go to the highest.  So we can just lookup the owner backref for
5887  * the record and if we don't find it then we know it doesn't exist and we have
5888  * a FULL BACKREF.
5889  *
5890  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5891  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5892  * be set or not and then we can check later once we've gathered all the refs.
5893  */
5894 static int calc_extent_flag(struct btrfs_root *root,
5895                            struct cache_tree *extent_cache,
5896                            struct extent_buffer *buf,
5897                            struct root_item_record *ri,
5898                            u64 *flags)
5899 {
5900         struct extent_record *rec;
5901         struct cache_extent *cache;
5902         struct tree_backref *tback;
5903         u64 owner = 0;
5904
5905         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5906         /* we have added this extent before */
5907         BUG_ON(!cache);
5908         rec = container_of(cache, struct extent_record, cache);
5909
5910         /*
5911          * Except file/reloc tree, we can not have
5912          * FULL BACKREF MODE
5913          */
5914         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5915                 goto normal;
5916         /*
5917          * root node
5918          */
5919         if (buf->start == ri->bytenr)
5920                 goto normal;
5921
5922         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5923                 goto full_backref;
5924
5925         owner = btrfs_header_owner(buf);
5926         if (owner == ri->objectid)
5927                 goto normal;
5928
5929         tback = find_tree_backref(rec, 0, owner);
5930         if (!tback)
5931                 goto full_backref;
5932 normal:
5933         *flags = 0;
5934         if (rec->flag_block_full_backref != -1 &&
5935             rec->flag_block_full_backref != 0)
5936                 rec->bad_full_backref = 1;
5937         return 0;
5938 full_backref:
5939         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5940         if (rec->flag_block_full_backref != -1 &&
5941             rec->flag_block_full_backref != 1)
5942                 rec->bad_full_backref = 1;
5943         return 0;
5944 }
5945
5946 static int run_next_block(struct btrfs_root *root,
5947                           struct block_info *bits,
5948                           int bits_nr,
5949                           u64 *last,
5950                           struct cache_tree *pending,
5951                           struct cache_tree *seen,
5952                           struct cache_tree *reada,
5953                           struct cache_tree *nodes,
5954                           struct cache_tree *extent_cache,
5955                           struct cache_tree *chunk_cache,
5956                           struct rb_root *dev_cache,
5957                           struct block_group_tree *block_group_cache,
5958                           struct device_extent_tree *dev_extent_cache,
5959                           struct root_item_record *ri)
5960 {
5961         struct extent_buffer *buf;
5962         struct extent_record *rec = NULL;
5963         u64 bytenr;
5964         u32 size;
5965         u64 parent;
5966         u64 owner;
5967         u64 flags;
5968         u64 ptr;
5969         u64 gen = 0;
5970         int ret = 0;
5971         int i;
5972         int nritems;
5973         struct btrfs_key key;
5974         struct cache_extent *cache;
5975         int reada_bits;
5976
5977         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5978                                     bits_nr, &reada_bits);
5979         if (nritems == 0)
5980                 return 1;
5981
5982         if (!reada_bits) {
5983                 for(i = 0; i < nritems; i++) {
5984                         ret = add_cache_extent(reada, bits[i].start,
5985                                                bits[i].size);
5986                         if (ret == -EEXIST)
5987                                 continue;
5988
5989                         /* fixme, get the parent transid */
5990                         readahead_tree_block(root, bits[i].start,
5991                                              bits[i].size, 0);
5992                 }
5993         }
5994         *last = bits[0].start;
5995         bytenr = bits[0].start;
5996         size = bits[0].size;
5997
5998         cache = lookup_cache_extent(pending, bytenr, size);
5999         if (cache) {
6000                 remove_cache_extent(pending, cache);
6001                 free(cache);
6002         }
6003         cache = lookup_cache_extent(reada, bytenr, size);
6004         if (cache) {
6005                 remove_cache_extent(reada, cache);
6006                 free(cache);
6007         }
6008         cache = lookup_cache_extent(nodes, bytenr, size);
6009         if (cache) {
6010                 remove_cache_extent(nodes, cache);
6011                 free(cache);
6012         }
6013         cache = lookup_cache_extent(extent_cache, bytenr, size);
6014         if (cache) {
6015                 rec = container_of(cache, struct extent_record, cache);
6016                 gen = rec->parent_generation;
6017         }
6018
6019         /* fixme, get the real parent transid */
6020         buf = read_tree_block(root, bytenr, size, gen);
6021         if (!extent_buffer_uptodate(buf)) {
6022                 record_bad_block_io(root->fs_info,
6023                                     extent_cache, bytenr, size);
6024                 goto out;
6025         }
6026
6027         nritems = btrfs_header_nritems(buf);
6028
6029         flags = 0;
6030         if (!init_extent_tree) {
6031                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6032                                        btrfs_header_level(buf), 1, NULL,
6033                                        &flags);
6034                 if (ret < 0) {
6035                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6036                         if (ret < 0) {
6037                                 fprintf(stderr, "Couldn't calc extent flags\n");
6038                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6039                         }
6040                 }
6041         } else {
6042                 flags = 0;
6043                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6044                 if (ret < 0) {
6045                         fprintf(stderr, "Couldn't calc extent flags\n");
6046                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6047                 }
6048         }
6049
6050         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6051                 if (ri != NULL &&
6052                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6053                     ri->objectid == btrfs_header_owner(buf)) {
6054                         /*
6055                          * Ok we got to this block from it's original owner and
6056                          * we have FULL_BACKREF set.  Relocation can leave
6057                          * converted blocks over so this is altogether possible,
6058                          * however it's not possible if the generation > the
6059                          * last snapshot, so check for this case.
6060                          */
6061                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6062                             btrfs_header_generation(buf) > ri->last_snapshot) {
6063                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6064                                 rec->bad_full_backref = 1;
6065                         }
6066                 }
6067         } else {
6068                 if (ri != NULL &&
6069                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6070                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6071                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6072                         rec->bad_full_backref = 1;
6073                 }
6074         }
6075
6076         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6077                 rec->flag_block_full_backref = 1;
6078                 parent = bytenr;
6079                 owner = 0;
6080         } else {
6081                 rec->flag_block_full_backref = 0;
6082                 parent = 0;
6083                 owner = btrfs_header_owner(buf);
6084         }
6085
6086         ret = check_block(root, extent_cache, buf, flags);
6087         if (ret)
6088                 goto out;
6089
6090         if (btrfs_is_leaf(buf)) {
6091                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6092                 for (i = 0; i < nritems; i++) {
6093                         struct btrfs_file_extent_item *fi;
6094                         btrfs_item_key_to_cpu(buf, &key, i);
6095                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6096                                 process_extent_item(root, extent_cache, buf,
6097                                                     i);
6098                                 continue;
6099                         }
6100                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6101                                 process_extent_item(root, extent_cache, buf,
6102                                                     i);
6103                                 continue;
6104                         }
6105                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6106                                 total_csum_bytes +=
6107                                         btrfs_item_size_nr(buf, i);
6108                                 continue;
6109                         }
6110                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6111                                 process_chunk_item(chunk_cache, &key, buf, i);
6112                                 continue;
6113                         }
6114                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6115                                 process_device_item(dev_cache, &key, buf, i);
6116                                 continue;
6117                         }
6118                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6119                                 process_block_group_item(block_group_cache,
6120                                         &key, buf, i);
6121                                 continue;
6122                         }
6123                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6124                                 process_device_extent_item(dev_extent_cache,
6125                                         &key, buf, i);
6126                                 continue;
6127
6128                         }
6129                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6130 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6131                                 process_extent_ref_v0(extent_cache, buf, i);
6132 #else
6133                                 BUG();
6134 #endif
6135                                 continue;
6136                         }
6137
6138                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6139                                 add_tree_backref(extent_cache, key.objectid, 0,
6140                                                  key.offset, 0);
6141                                 continue;
6142                         }
6143                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6144                                 add_tree_backref(extent_cache, key.objectid,
6145                                                  key.offset, 0, 0);
6146                                 continue;
6147                         }
6148                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6149                                 struct btrfs_extent_data_ref *ref;
6150                                 ref = btrfs_item_ptr(buf, i,
6151                                                 struct btrfs_extent_data_ref);
6152                                 add_data_backref(extent_cache,
6153                                         key.objectid, 0,
6154                                         btrfs_extent_data_ref_root(buf, ref),
6155                                         btrfs_extent_data_ref_objectid(buf,
6156                                                                        ref),
6157                                         btrfs_extent_data_ref_offset(buf, ref),
6158                                         btrfs_extent_data_ref_count(buf, ref),
6159                                         0, root->sectorsize);
6160                                 continue;
6161                         }
6162                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6163                                 struct btrfs_shared_data_ref *ref;
6164                                 ref = btrfs_item_ptr(buf, i,
6165                                                 struct btrfs_shared_data_ref);
6166                                 add_data_backref(extent_cache,
6167                                         key.objectid, key.offset, 0, 0, 0,
6168                                         btrfs_shared_data_ref_count(buf, ref),
6169                                         0, root->sectorsize);
6170                                 continue;
6171                         }
6172                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6173                                 struct bad_item *bad;
6174
6175                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6176                                         continue;
6177                                 if (!owner)
6178                                         continue;
6179                                 bad = malloc(sizeof(struct bad_item));
6180                                 if (!bad)
6181                                         continue;
6182                                 INIT_LIST_HEAD(&bad->list);
6183                                 memcpy(&bad->key, &key,
6184                                        sizeof(struct btrfs_key));
6185                                 bad->root_id = owner;
6186                                 list_add_tail(&bad->list, &delete_items);
6187                                 continue;
6188                         }
6189                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6190                                 continue;
6191                         fi = btrfs_item_ptr(buf, i,
6192                                             struct btrfs_file_extent_item);
6193                         if (btrfs_file_extent_type(buf, fi) ==
6194                             BTRFS_FILE_EXTENT_INLINE)
6195                                 continue;
6196                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6197                                 continue;
6198
6199                         data_bytes_allocated +=
6200                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6201                         if (data_bytes_allocated < root->sectorsize) {
6202                                 abort();
6203                         }
6204                         data_bytes_referenced +=
6205                                 btrfs_file_extent_num_bytes(buf, fi);
6206                         add_data_backref(extent_cache,
6207                                 btrfs_file_extent_disk_bytenr(buf, fi),
6208                                 parent, owner, key.objectid, key.offset -
6209                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6210                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6211                 }
6212         } else {
6213                 int level;
6214                 struct btrfs_key first_key;
6215
6216                 first_key.objectid = 0;
6217
6218                 if (nritems > 0)
6219                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6220                 level = btrfs_header_level(buf);
6221                 for (i = 0; i < nritems; i++) {
6222                         ptr = btrfs_node_blockptr(buf, i);
6223                         size = root->nodesize;
6224                         btrfs_node_key_to_cpu(buf, &key, i);
6225                         if (ri != NULL) {
6226                                 if ((level == ri->drop_level)
6227                                     && is_dropped_key(&key, &ri->drop_key)) {
6228                                         continue;
6229                                 }
6230                         }
6231                         ret = add_extent_rec(extent_cache, &key,
6232                                              btrfs_node_ptr_generation(buf, i),
6233                                              ptr, size, 0, 0, 1, 0, 1, 0,
6234                                              size);
6235                         BUG_ON(ret);
6236
6237                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6238
6239                         if (level > 1) {
6240                                 add_pending(nodes, seen, ptr, size);
6241                         } else {
6242                                 add_pending(pending, seen, ptr, size);
6243                         }
6244                 }
6245                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6246                                       nritems) * sizeof(struct btrfs_key_ptr);
6247         }
6248         total_btree_bytes += buf->len;
6249         if (fs_root_objectid(btrfs_header_owner(buf)))
6250                 total_fs_tree_bytes += buf->len;
6251         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6252                 total_extent_tree_bytes += buf->len;
6253         if (!found_old_backref &&
6254             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6255             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6256             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6257                 found_old_backref = 1;
6258 out:
6259         free_extent_buffer(buf);
6260         return ret;
6261 }
6262
6263 static int add_root_to_pending(struct extent_buffer *buf,
6264                                struct cache_tree *extent_cache,
6265                                struct cache_tree *pending,
6266                                struct cache_tree *seen,
6267                                struct cache_tree *nodes,
6268                                u64 objectid)
6269 {
6270         if (btrfs_header_level(buf) > 0)
6271                 add_pending(nodes, seen, buf->start, buf->len);
6272         else
6273                 add_pending(pending, seen, buf->start, buf->len);
6274         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6275                        0, 1, 1, 0, 1, 0, buf->len);
6276
6277         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6278             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6279                 add_tree_backref(extent_cache, buf->start, buf->start,
6280                                  0, 1);
6281         else
6282                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6283         return 0;
6284 }
6285
6286 /* as we fix the tree, we might be deleting blocks that
6287  * we're tracking for repair.  This hook makes sure we
6288  * remove any backrefs for blocks as we are fixing them.
6289  */
6290 static int free_extent_hook(struct btrfs_trans_handle *trans,
6291                             struct btrfs_root *root,
6292                             u64 bytenr, u64 num_bytes, u64 parent,
6293                             u64 root_objectid, u64 owner, u64 offset,
6294                             int refs_to_drop)
6295 {
6296         struct extent_record *rec;
6297         struct cache_extent *cache;
6298         int is_data;
6299         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6300
6301         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6302         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6303         if (!cache)
6304                 return 0;
6305
6306         rec = container_of(cache, struct extent_record, cache);
6307         if (is_data) {
6308                 struct data_backref *back;
6309                 back = find_data_backref(rec, parent, root_objectid, owner,
6310                                          offset, 1, bytenr, num_bytes);
6311                 if (!back)
6312                         goto out;
6313                 if (back->node.found_ref) {
6314                         back->found_ref -= refs_to_drop;
6315                         if (rec->refs)
6316                                 rec->refs -= refs_to_drop;
6317                 }
6318                 if (back->node.found_extent_tree) {
6319                         back->num_refs -= refs_to_drop;
6320                         if (rec->extent_item_refs)
6321                                 rec->extent_item_refs -= refs_to_drop;
6322                 }
6323                 if (back->found_ref == 0)
6324                         back->node.found_ref = 0;
6325                 if (back->num_refs == 0)
6326                         back->node.found_extent_tree = 0;
6327
6328                 if (!back->node.found_extent_tree && back->node.found_ref) {
6329                         list_del(&back->node.list);
6330                         free(back);
6331                 }
6332         } else {
6333                 struct tree_backref *back;
6334                 back = find_tree_backref(rec, parent, root_objectid);
6335                 if (!back)
6336                         goto out;
6337                 if (back->node.found_ref) {
6338                         if (rec->refs)
6339                                 rec->refs--;
6340                         back->node.found_ref = 0;
6341                 }
6342                 if (back->node.found_extent_tree) {
6343                         if (rec->extent_item_refs)
6344                                 rec->extent_item_refs--;
6345                         back->node.found_extent_tree = 0;
6346                 }
6347                 if (!back->node.found_extent_tree && back->node.found_ref) {
6348                         list_del(&back->node.list);
6349                         free(back);
6350                 }
6351         }
6352         maybe_free_extent_rec(extent_cache, rec);
6353 out:
6354         return 0;
6355 }
6356
6357 static int delete_extent_records(struct btrfs_trans_handle *trans,
6358                                  struct btrfs_root *root,
6359                                  struct btrfs_path *path,
6360                                  u64 bytenr, u64 new_len)
6361 {
6362         struct btrfs_key key;
6363         struct btrfs_key found_key;
6364         struct extent_buffer *leaf;
6365         int ret;
6366         int slot;
6367
6368
6369         key.objectid = bytenr;
6370         key.type = (u8)-1;
6371         key.offset = (u64)-1;
6372
6373         while(1) {
6374                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6375                                         &key, path, 0, 1);
6376                 if (ret < 0)
6377                         break;
6378
6379                 if (ret > 0) {
6380                         ret = 0;
6381                         if (path->slots[0] == 0)
6382                                 break;
6383                         path->slots[0]--;
6384                 }
6385                 ret = 0;
6386
6387                 leaf = path->nodes[0];
6388                 slot = path->slots[0];
6389
6390                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6391                 if (found_key.objectid != bytenr)
6392                         break;
6393
6394                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6395                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6396                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6397                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6398                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6399                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6400                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6401                         btrfs_release_path(path);
6402                         if (found_key.type == 0) {
6403                                 if (found_key.offset == 0)
6404                                         break;
6405                                 key.offset = found_key.offset - 1;
6406                                 key.type = found_key.type;
6407                         }
6408                         key.type = found_key.type - 1;
6409                         key.offset = (u64)-1;
6410                         continue;
6411                 }
6412
6413                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6414                         found_key.objectid, found_key.type, found_key.offset);
6415
6416                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6417                 if (ret)
6418                         break;
6419                 btrfs_release_path(path);
6420
6421                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6422                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6423                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6424                                 found_key.offset : root->nodesize;
6425
6426                         ret = btrfs_update_block_group(trans, root, bytenr,
6427                                                        bytes, 0, 0);
6428                         if (ret)
6429                                 break;
6430                 }
6431         }
6432
6433         btrfs_release_path(path);
6434         return ret;
6435 }
6436
6437 /*
6438  * for a single backref, this will allocate a new extent
6439  * and add the backref to it.
6440  */
6441 static int record_extent(struct btrfs_trans_handle *trans,
6442                          struct btrfs_fs_info *info,
6443                          struct btrfs_path *path,
6444                          struct extent_record *rec,
6445                          struct extent_backref *back,
6446                          int allocated, u64 flags)
6447 {
6448         int ret;
6449         struct btrfs_root *extent_root = info->extent_root;
6450         struct extent_buffer *leaf;
6451         struct btrfs_key ins_key;
6452         struct btrfs_extent_item *ei;
6453         struct tree_backref *tback;
6454         struct data_backref *dback;
6455         struct btrfs_tree_block_info *bi;
6456
6457         if (!back->is_data)
6458                 rec->max_size = max_t(u64, rec->max_size,
6459                                     info->extent_root->nodesize);
6460
6461         if (!allocated) {
6462                 u32 item_size = sizeof(*ei);
6463
6464                 if (!back->is_data)
6465                         item_size += sizeof(*bi);
6466
6467                 ins_key.objectid = rec->start;
6468                 ins_key.offset = rec->max_size;
6469                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6470
6471                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6472                                         &ins_key, item_size);
6473                 if (ret)
6474                         goto fail;
6475
6476                 leaf = path->nodes[0];
6477                 ei = btrfs_item_ptr(leaf, path->slots[0],
6478                                     struct btrfs_extent_item);
6479
6480                 btrfs_set_extent_refs(leaf, ei, 0);
6481                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6482
6483                 if (back->is_data) {
6484                         btrfs_set_extent_flags(leaf, ei,
6485                                                BTRFS_EXTENT_FLAG_DATA);
6486                 } else {
6487                         struct btrfs_disk_key copy_key;;
6488
6489                         tback = (struct tree_backref *)back;
6490                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6491                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6492                                              sizeof(*bi));
6493
6494                         btrfs_set_disk_key_objectid(&copy_key,
6495                                                     rec->info_objectid);
6496                         btrfs_set_disk_key_type(&copy_key, 0);
6497                         btrfs_set_disk_key_offset(&copy_key, 0);
6498
6499                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6500                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6501
6502                         btrfs_set_extent_flags(leaf, ei,
6503                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6504                 }
6505
6506                 btrfs_mark_buffer_dirty(leaf);
6507                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6508                                                rec->max_size, 1, 0);
6509                 if (ret)
6510                         goto fail;
6511                 btrfs_release_path(path);
6512         }
6513
6514         if (back->is_data) {
6515                 u64 parent;
6516                 int i;
6517
6518                 dback = (struct data_backref *)back;
6519                 if (back->full_backref)
6520                         parent = dback->parent;
6521                 else
6522                         parent = 0;
6523
6524                 for (i = 0; i < dback->found_ref; i++) {
6525                         /* if parent != 0, we're doing a full backref
6526                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6527                          * just makes the backref allocator create a data
6528                          * backref
6529                          */
6530                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6531                                                    rec->start, rec->max_size,
6532                                                    parent,
6533                                                    dback->root,
6534                                                    parent ?
6535                                                    BTRFS_FIRST_FREE_OBJECTID :
6536                                                    dback->owner,
6537                                                    dback->offset);
6538                         if (ret)
6539                                 break;
6540                 }
6541                 fprintf(stderr, "adding new data backref"
6542                                 " on %llu %s %llu owner %llu"
6543                                 " offset %llu found %d\n",
6544                                 (unsigned long long)rec->start,
6545                                 back->full_backref ?
6546                                 "parent" : "root",
6547                                 back->full_backref ?
6548                                 (unsigned long long)parent :
6549                                 (unsigned long long)dback->root,
6550                                 (unsigned long long)dback->owner,
6551                                 (unsigned long long)dback->offset,
6552                                 dback->found_ref);
6553         } else {
6554                 u64 parent;
6555
6556                 tback = (struct tree_backref *)back;
6557                 if (back->full_backref)
6558                         parent = tback->parent;
6559                 else
6560                         parent = 0;
6561
6562                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6563                                            rec->start, rec->max_size,
6564                                            parent, tback->root, 0, 0);
6565                 fprintf(stderr, "adding new tree backref on "
6566                         "start %llu len %llu parent %llu root %llu\n",
6567                         rec->start, rec->max_size, parent, tback->root);
6568         }
6569 fail:
6570         btrfs_release_path(path);
6571         return ret;
6572 }
6573
6574 static struct extent_entry *find_entry(struct list_head *entries,
6575                                        u64 bytenr, u64 bytes)
6576 {
6577         struct extent_entry *entry = NULL;
6578
6579         list_for_each_entry(entry, entries, list) {
6580                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6581                         return entry;
6582         }
6583
6584         return NULL;
6585 }
6586
6587 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6588 {
6589         struct extent_entry *entry, *best = NULL, *prev = NULL;
6590
6591         list_for_each_entry(entry, entries, list) {
6592                 if (!prev) {
6593                         prev = entry;
6594                         continue;
6595                 }
6596
6597                 /*
6598                  * If there are as many broken entries as entries then we know
6599                  * not to trust this particular entry.
6600                  */
6601                 if (entry->broken == entry->count)
6602                         continue;
6603
6604                 /*
6605                  * If our current entry == best then we can't be sure our best
6606                  * is really the best, so we need to keep searching.
6607                  */
6608                 if (best && best->count == entry->count) {
6609                         prev = entry;
6610                         best = NULL;
6611                         continue;
6612                 }
6613
6614                 /* Prev == entry, not good enough, have to keep searching */
6615                 if (!prev->broken && prev->count == entry->count)
6616                         continue;
6617
6618                 if (!best)
6619                         best = (prev->count > entry->count) ? prev : entry;
6620                 else if (best->count < entry->count)
6621                         best = entry;
6622                 prev = entry;
6623         }
6624
6625         return best;
6626 }
6627
6628 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6629                       struct data_backref *dback, struct extent_entry *entry)
6630 {
6631         struct btrfs_trans_handle *trans;
6632         struct btrfs_root *root;
6633         struct btrfs_file_extent_item *fi;
6634         struct extent_buffer *leaf;
6635         struct btrfs_key key;
6636         u64 bytenr, bytes;
6637         int ret, err;
6638
6639         key.objectid = dback->root;
6640         key.type = BTRFS_ROOT_ITEM_KEY;
6641         key.offset = (u64)-1;
6642         root = btrfs_read_fs_root(info, &key);
6643         if (IS_ERR(root)) {
6644                 fprintf(stderr, "Couldn't find root for our ref\n");
6645                 return -EINVAL;
6646         }
6647
6648         /*
6649          * The backref points to the original offset of the extent if it was
6650          * split, so we need to search down to the offset we have and then walk
6651          * forward until we find the backref we're looking for.
6652          */
6653         key.objectid = dback->owner;
6654         key.type = BTRFS_EXTENT_DATA_KEY;
6655         key.offset = dback->offset;
6656         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6657         if (ret < 0) {
6658                 fprintf(stderr, "Error looking up ref %d\n", ret);
6659                 return ret;
6660         }
6661
6662         while (1) {
6663                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6664                         ret = btrfs_next_leaf(root, path);
6665                         if (ret) {
6666                                 fprintf(stderr, "Couldn't find our ref, next\n");
6667                                 return -EINVAL;
6668                         }
6669                 }
6670                 leaf = path->nodes[0];
6671                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6672                 if (key.objectid != dback->owner ||
6673                     key.type != BTRFS_EXTENT_DATA_KEY) {
6674                         fprintf(stderr, "Couldn't find our ref, search\n");
6675                         return -EINVAL;
6676                 }
6677                 fi = btrfs_item_ptr(leaf, path->slots[0],
6678                                     struct btrfs_file_extent_item);
6679                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6680                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6681
6682                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6683                         break;
6684                 path->slots[0]++;
6685         }
6686
6687         btrfs_release_path(path);
6688
6689         trans = btrfs_start_transaction(root, 1);
6690         if (IS_ERR(trans))
6691                 return PTR_ERR(trans);
6692
6693         /*
6694          * Ok we have the key of the file extent we want to fix, now we can cow
6695          * down to the thing and fix it.
6696          */
6697         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6698         if (ret < 0) {
6699                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6700                         key.objectid, key.type, key.offset, ret);
6701                 goto out;
6702         }
6703         if (ret > 0) {
6704                 fprintf(stderr, "Well that's odd, we just found this key "
6705                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6706                         key.offset);
6707                 ret = -EINVAL;
6708                 goto out;
6709         }
6710         leaf = path->nodes[0];
6711         fi = btrfs_item_ptr(leaf, path->slots[0],
6712                             struct btrfs_file_extent_item);
6713
6714         if (btrfs_file_extent_compression(leaf, fi) &&
6715             dback->disk_bytenr != entry->bytenr) {
6716                 fprintf(stderr, "Ref doesn't match the record start and is "
6717                         "compressed, please take a btrfs-image of this file "
6718                         "system and send it to a btrfs developer so they can "
6719                         "complete this functionality for bytenr %Lu\n",
6720                         dback->disk_bytenr);
6721                 ret = -EINVAL;
6722                 goto out;
6723         }
6724
6725         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6726                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6727         } else if (dback->disk_bytenr > entry->bytenr) {
6728                 u64 off_diff, offset;
6729
6730                 off_diff = dback->disk_bytenr - entry->bytenr;
6731                 offset = btrfs_file_extent_offset(leaf, fi);
6732                 if (dback->disk_bytenr + offset +
6733                     btrfs_file_extent_num_bytes(leaf, fi) >
6734                     entry->bytenr + entry->bytes) {
6735                         fprintf(stderr, "Ref is past the entry end, please "
6736                                 "take a btrfs-image of this file system and "
6737                                 "send it to a btrfs developer, ref %Lu\n",
6738                                 dback->disk_bytenr);
6739                         ret = -EINVAL;
6740                         goto out;
6741                 }
6742                 offset += off_diff;
6743                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6744                 btrfs_set_file_extent_offset(leaf, fi, offset);
6745         } else if (dback->disk_bytenr < entry->bytenr) {
6746                 u64 offset;
6747
6748                 offset = btrfs_file_extent_offset(leaf, fi);
6749                 if (dback->disk_bytenr + offset < entry->bytenr) {
6750                         fprintf(stderr, "Ref is before the entry start, please"
6751                                 " take a btrfs-image of this file system and "
6752                                 "send it to a btrfs developer, ref %Lu\n",
6753                                 dback->disk_bytenr);
6754                         ret = -EINVAL;
6755                         goto out;
6756                 }
6757
6758                 offset += dback->disk_bytenr;
6759                 offset -= entry->bytenr;
6760                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6761                 btrfs_set_file_extent_offset(leaf, fi, offset);
6762         }
6763
6764         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6765
6766         /*
6767          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6768          * only do this if we aren't using compression, otherwise it's a
6769          * trickier case.
6770          */
6771         if (!btrfs_file_extent_compression(leaf, fi))
6772                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6773         else
6774                 printf("ram bytes may be wrong?\n");
6775         btrfs_mark_buffer_dirty(leaf);
6776 out:
6777         err = btrfs_commit_transaction(trans, root);
6778         btrfs_release_path(path);
6779         return ret ? ret : err;
6780 }
6781
6782 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6783                            struct extent_record *rec)
6784 {
6785         struct extent_backref *back;
6786         struct data_backref *dback;
6787         struct extent_entry *entry, *best = NULL;
6788         LIST_HEAD(entries);
6789         int nr_entries = 0;
6790         int broken_entries = 0;
6791         int ret = 0;
6792         short mismatch = 0;
6793
6794         /*
6795          * Metadata is easy and the backrefs should always agree on bytenr and
6796          * size, if not we've got bigger issues.
6797          */
6798         if (rec->metadata)
6799                 return 0;
6800
6801         list_for_each_entry(back, &rec->backrefs, list) {
6802                 if (back->full_backref || !back->is_data)
6803                         continue;
6804
6805                 dback = (struct data_backref *)back;
6806
6807                 /*
6808                  * We only pay attention to backrefs that we found a real
6809                  * backref for.
6810                  */
6811                 if (dback->found_ref == 0)
6812                         continue;
6813
6814                 /*
6815                  * For now we only catch when the bytes don't match, not the
6816                  * bytenr.  We can easily do this at the same time, but I want
6817                  * to have a fs image to test on before we just add repair
6818                  * functionality willy-nilly so we know we won't screw up the
6819                  * repair.
6820                  */
6821
6822                 entry = find_entry(&entries, dback->disk_bytenr,
6823                                    dback->bytes);
6824                 if (!entry) {
6825                         entry = malloc(sizeof(struct extent_entry));
6826                         if (!entry) {
6827                                 ret = -ENOMEM;
6828                                 goto out;
6829                         }
6830                         memset(entry, 0, sizeof(*entry));
6831                         entry->bytenr = dback->disk_bytenr;
6832                         entry->bytes = dback->bytes;
6833                         list_add_tail(&entry->list, &entries);
6834                         nr_entries++;
6835                 }
6836
6837                 /*
6838                  * If we only have on entry we may think the entries agree when
6839                  * in reality they don't so we have to do some extra checking.
6840                  */
6841                 if (dback->disk_bytenr != rec->start ||
6842                     dback->bytes != rec->nr || back->broken)
6843                         mismatch = 1;
6844
6845                 if (back->broken) {
6846                         entry->broken++;
6847                         broken_entries++;
6848                 }
6849
6850                 entry->count++;
6851         }
6852
6853         /* Yay all the backrefs agree, carry on good sir */
6854         if (nr_entries <= 1 && !mismatch)
6855                 goto out;
6856
6857         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6858                 "%Lu\n", rec->start);
6859
6860         /*
6861          * First we want to see if the backrefs can agree amongst themselves who
6862          * is right, so figure out which one of the entries has the highest
6863          * count.
6864          */
6865         best = find_most_right_entry(&entries);
6866
6867         /*
6868          * Ok so we may have an even split between what the backrefs think, so
6869          * this is where we use the extent ref to see what it thinks.
6870          */
6871         if (!best) {
6872                 entry = find_entry(&entries, rec->start, rec->nr);
6873                 if (!entry && (!broken_entries || !rec->found_rec)) {
6874                         fprintf(stderr, "Backrefs don't agree with each other "
6875                                 "and extent record doesn't agree with anybody,"
6876                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6877                                 rec->start, rec->nr);
6878                         ret = -EINVAL;
6879                         goto out;
6880                 } else if (!entry) {
6881                         /*
6882                          * Ok our backrefs were broken, we'll assume this is the
6883                          * correct value and add an entry for this range.
6884                          */
6885                         entry = malloc(sizeof(struct extent_entry));
6886                         if (!entry) {
6887                                 ret = -ENOMEM;
6888                                 goto out;
6889                         }
6890                         memset(entry, 0, sizeof(*entry));
6891                         entry->bytenr = rec->start;
6892                         entry->bytes = rec->nr;
6893                         list_add_tail(&entry->list, &entries);
6894                         nr_entries++;
6895                 }
6896                 entry->count++;
6897                 best = find_most_right_entry(&entries);
6898                 if (!best) {
6899                         fprintf(stderr, "Backrefs and extent record evenly "
6900                                 "split on who is right, this is going to "
6901                                 "require user input to fix bytenr %Lu bytes "
6902                                 "%Lu\n", rec->start, rec->nr);
6903                         ret = -EINVAL;
6904                         goto out;
6905                 }
6906         }
6907
6908         /*
6909          * I don't think this can happen currently as we'll abort() if we catch
6910          * this case higher up, but in case somebody removes that we still can't
6911          * deal with it properly here yet, so just bail out of that's the case.
6912          */
6913         if (best->bytenr != rec->start) {
6914                 fprintf(stderr, "Extent start and backref starts don't match, "
6915                         "please use btrfs-image on this file system and send "
6916                         "it to a btrfs developer so they can make fsck fix "
6917                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6918                         rec->start, rec->nr);
6919                 ret = -EINVAL;
6920                 goto out;
6921         }
6922
6923         /*
6924          * Ok great we all agreed on an extent record, let's go find the real
6925          * references and fix up the ones that don't match.
6926          */
6927         list_for_each_entry(back, &rec->backrefs, list) {
6928                 if (back->full_backref || !back->is_data)
6929                         continue;
6930
6931                 dback = (struct data_backref *)back;
6932
6933                 /*
6934                  * Still ignoring backrefs that don't have a real ref attached
6935                  * to them.
6936                  */
6937                 if (dback->found_ref == 0)
6938                         continue;
6939
6940                 if (dback->bytes == best->bytes &&
6941                     dback->disk_bytenr == best->bytenr)
6942                         continue;
6943
6944                 ret = repair_ref(info, path, dback, best);
6945                 if (ret)
6946                         goto out;
6947         }
6948
6949         /*
6950          * Ok we messed with the actual refs, which means we need to drop our
6951          * entire cache and go back and rescan.  I know this is a huge pain and
6952          * adds a lot of extra work, but it's the only way to be safe.  Once all
6953          * the backrefs agree we may not need to do anything to the extent
6954          * record itself.
6955          */
6956         ret = -EAGAIN;
6957 out:
6958         while (!list_empty(&entries)) {
6959                 entry = list_entry(entries.next, struct extent_entry, list);
6960                 list_del_init(&entry->list);
6961                 free(entry);
6962         }
6963         return ret;
6964 }
6965
6966 static int process_duplicates(struct btrfs_root *root,
6967                               struct cache_tree *extent_cache,
6968                               struct extent_record *rec)
6969 {
6970         struct extent_record *good, *tmp;
6971         struct cache_extent *cache;
6972         int ret;
6973
6974         /*
6975          * If we found a extent record for this extent then return, or if we
6976          * have more than one duplicate we are likely going to need to delete
6977          * something.
6978          */
6979         if (rec->found_rec || rec->num_duplicates > 1)
6980                 return 0;
6981
6982         /* Shouldn't happen but just in case */
6983         BUG_ON(!rec->num_duplicates);
6984
6985         /*
6986          * So this happens if we end up with a backref that doesn't match the
6987          * actual extent entry.  So either the backref is bad or the extent
6988          * entry is bad.  Either way we want to have the extent_record actually
6989          * reflect what we found in the extent_tree, so we need to take the
6990          * duplicate out and use that as the extent_record since the only way we
6991          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6992          */
6993         remove_cache_extent(extent_cache, &rec->cache);
6994
6995         good = list_entry(rec->dups.next, struct extent_record, list);
6996         list_del_init(&good->list);
6997         INIT_LIST_HEAD(&good->backrefs);
6998         INIT_LIST_HEAD(&good->dups);
6999         good->cache.start = good->start;
7000         good->cache.size = good->nr;
7001         good->content_checked = 0;
7002         good->owner_ref_checked = 0;
7003         good->num_duplicates = 0;
7004         good->refs = rec->refs;
7005         list_splice_init(&rec->backrefs, &good->backrefs);
7006         while (1) {
7007                 cache = lookup_cache_extent(extent_cache, good->start,
7008                                             good->nr);
7009                 if (!cache)
7010                         break;
7011                 tmp = container_of(cache, struct extent_record, cache);
7012
7013                 /*
7014                  * If we find another overlapping extent and it's found_rec is
7015                  * set then it's a duplicate and we need to try and delete
7016                  * something.
7017                  */
7018                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7019                         if (list_empty(&good->list))
7020                                 list_add_tail(&good->list,
7021                                               &duplicate_extents);
7022                         good->num_duplicates += tmp->num_duplicates + 1;
7023                         list_splice_init(&tmp->dups, &good->dups);
7024                         list_del_init(&tmp->list);
7025                         list_add_tail(&tmp->list, &good->dups);
7026                         remove_cache_extent(extent_cache, &tmp->cache);
7027                         continue;
7028                 }
7029
7030                 /*
7031                  * Ok we have another non extent item backed extent rec, so lets
7032                  * just add it to this extent and carry on like we did above.
7033                  */
7034                 good->refs += tmp->refs;
7035                 list_splice_init(&tmp->backrefs, &good->backrefs);
7036                 remove_cache_extent(extent_cache, &tmp->cache);
7037                 free(tmp);
7038         }
7039         ret = insert_cache_extent(extent_cache, &good->cache);
7040         BUG_ON(ret);
7041         free(rec);
7042         return good->num_duplicates ? 0 : 1;
7043 }
7044
7045 static int delete_duplicate_records(struct btrfs_root *root,
7046                                     struct extent_record *rec)
7047 {
7048         struct btrfs_trans_handle *trans;
7049         LIST_HEAD(delete_list);
7050         struct btrfs_path *path;
7051         struct extent_record *tmp, *good, *n;
7052         int nr_del = 0;
7053         int ret = 0, err;
7054         struct btrfs_key key;
7055
7056         path = btrfs_alloc_path();
7057         if (!path) {
7058                 ret = -ENOMEM;
7059                 goto out;
7060         }
7061
7062         good = rec;
7063         /* Find the record that covers all of the duplicates. */
7064         list_for_each_entry(tmp, &rec->dups, list) {
7065                 if (good->start < tmp->start)
7066                         continue;
7067                 if (good->nr > tmp->nr)
7068                         continue;
7069
7070                 if (tmp->start + tmp->nr < good->start + good->nr) {
7071                         fprintf(stderr, "Ok we have overlapping extents that "
7072                                 "aren't completely covered by eachother, this "
7073                                 "is going to require more careful thought.  "
7074                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7075                                 tmp->start, tmp->nr, good->start, good->nr);
7076                         abort();
7077                 }
7078                 good = tmp;
7079         }
7080
7081         if (good != rec)
7082                 list_add_tail(&rec->list, &delete_list);
7083
7084         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7085                 if (tmp == good)
7086                         continue;
7087                 list_move_tail(&tmp->list, &delete_list);
7088         }
7089
7090         root = root->fs_info->extent_root;
7091         trans = btrfs_start_transaction(root, 1);
7092         if (IS_ERR(trans)) {
7093                 ret = PTR_ERR(trans);
7094                 goto out;
7095         }
7096
7097         list_for_each_entry(tmp, &delete_list, list) {
7098                 if (tmp->found_rec == 0)
7099                         continue;
7100                 key.objectid = tmp->start;
7101                 key.type = BTRFS_EXTENT_ITEM_KEY;
7102                 key.offset = tmp->nr;
7103
7104                 /* Shouldn't happen but just in case */
7105                 if (tmp->metadata) {
7106                         fprintf(stderr, "Well this shouldn't happen, extent "
7107                                 "record overlaps but is metadata? "
7108                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7109                         abort();
7110                 }
7111
7112                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7113                 if (ret) {
7114                         if (ret > 0)
7115                                 ret = -EINVAL;
7116                         break;
7117                 }
7118                 ret = btrfs_del_item(trans, root, path);
7119                 if (ret)
7120                         break;
7121                 btrfs_release_path(path);
7122                 nr_del++;
7123         }
7124         err = btrfs_commit_transaction(trans, root);
7125         if (err && !ret)
7126                 ret = err;
7127 out:
7128         while (!list_empty(&delete_list)) {
7129                 tmp = list_entry(delete_list.next, struct extent_record, list);
7130                 list_del_init(&tmp->list);
7131                 if (tmp == rec)
7132                         continue;
7133                 free(tmp);
7134         }
7135
7136         while (!list_empty(&rec->dups)) {
7137                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7138                 list_del_init(&tmp->list);
7139                 free(tmp);
7140         }
7141
7142         btrfs_free_path(path);
7143
7144         if (!ret && !nr_del)
7145                 rec->num_duplicates = 0;
7146
7147         return ret ? ret : nr_del;
7148 }
7149
7150 static int find_possible_backrefs(struct btrfs_fs_info *info,
7151                                   struct btrfs_path *path,
7152                                   struct cache_tree *extent_cache,
7153                                   struct extent_record *rec)
7154 {
7155         struct btrfs_root *root;
7156         struct extent_backref *back;
7157         struct data_backref *dback;
7158         struct cache_extent *cache;
7159         struct btrfs_file_extent_item *fi;
7160         struct btrfs_key key;
7161         u64 bytenr, bytes;
7162         int ret;
7163
7164         list_for_each_entry(back, &rec->backrefs, list) {
7165                 /* Don't care about full backrefs (poor unloved backrefs) */
7166                 if (back->full_backref || !back->is_data)
7167                         continue;
7168
7169                 dback = (struct data_backref *)back;
7170
7171                 /* We found this one, we don't need to do a lookup */
7172                 if (dback->found_ref)
7173                         continue;
7174
7175                 key.objectid = dback->root;
7176                 key.type = BTRFS_ROOT_ITEM_KEY;
7177                 key.offset = (u64)-1;
7178
7179                 root = btrfs_read_fs_root(info, &key);
7180
7181                 /* No root, definitely a bad ref, skip */
7182                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7183                         continue;
7184                 /* Other err, exit */
7185                 if (IS_ERR(root))
7186                         return PTR_ERR(root);
7187
7188                 key.objectid = dback->owner;
7189                 key.type = BTRFS_EXTENT_DATA_KEY;
7190                 key.offset = dback->offset;
7191                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7192                 if (ret) {
7193                         btrfs_release_path(path);
7194                         if (ret < 0)
7195                                 return ret;
7196                         /* Didn't find it, we can carry on */
7197                         ret = 0;
7198                         continue;
7199                 }
7200
7201                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7202                                     struct btrfs_file_extent_item);
7203                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7204                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7205                 btrfs_release_path(path);
7206                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7207                 if (cache) {
7208                         struct extent_record *tmp;
7209                         tmp = container_of(cache, struct extent_record, cache);
7210
7211                         /*
7212                          * If we found an extent record for the bytenr for this
7213                          * particular backref then we can't add it to our
7214                          * current extent record.  We only want to add backrefs
7215                          * that don't have a corresponding extent item in the
7216                          * extent tree since they likely belong to this record
7217                          * and we need to fix it if it doesn't match bytenrs.
7218                          */
7219                         if  (tmp->found_rec)
7220                                 continue;
7221                 }
7222
7223                 dback->found_ref += 1;
7224                 dback->disk_bytenr = bytenr;
7225                 dback->bytes = bytes;
7226
7227                 /*
7228                  * Set this so the verify backref code knows not to trust the
7229                  * values in this backref.
7230                  */
7231                 back->broken = 1;
7232         }
7233
7234         return 0;
7235 }
7236
7237 /*
7238  * Record orphan data ref into corresponding root.
7239  *
7240  * Return 0 if the extent item contains data ref and recorded.
7241  * Return 1 if the extent item contains no useful data ref
7242  *   On that case, it may contains only shared_dataref or metadata backref
7243  *   or the file extent exists(this should be handled by the extent bytenr
7244  *   recovery routine)
7245  * Return <0 if something goes wrong.
7246  */
7247 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7248                                       struct extent_record *rec)
7249 {
7250         struct btrfs_key key;
7251         struct btrfs_root *dest_root;
7252         struct extent_backref *back;
7253         struct data_backref *dback;
7254         struct orphan_data_extent *orphan;
7255         struct btrfs_path *path;
7256         int recorded_data_ref = 0;
7257         int ret = 0;
7258
7259         if (rec->metadata)
7260                 return 1;
7261         path = btrfs_alloc_path();
7262         if (!path)
7263                 return -ENOMEM;
7264         list_for_each_entry(back, &rec->backrefs, list) {
7265                 if (back->full_backref || !back->is_data ||
7266                     !back->found_extent_tree)
7267                         continue;
7268                 dback = (struct data_backref *)back;
7269                 if (dback->found_ref)
7270                         continue;
7271                 key.objectid = dback->root;
7272                 key.type = BTRFS_ROOT_ITEM_KEY;
7273                 key.offset = (u64)-1;
7274
7275                 dest_root = btrfs_read_fs_root(fs_info, &key);
7276
7277                 /* For non-exist root we just skip it */
7278                 if (IS_ERR(dest_root) || !dest_root)
7279                         continue;
7280
7281                 key.objectid = dback->owner;
7282                 key.type = BTRFS_EXTENT_DATA_KEY;
7283                 key.offset = dback->offset;
7284
7285                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7286                 /*
7287                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7288                  * we need to record it for inode/file extent rebuild.
7289                  * For ret > 0, we record it only for file extent rebuild.
7290                  * For ret == 0, the file extent exists but only bytenr
7291                  * mismatch, let the original bytenr fix routine to handle,
7292                  * don't record it.
7293                  */
7294                 if (ret == 0)
7295                         continue;
7296                 ret = 0;
7297                 orphan = malloc(sizeof(*orphan));
7298                 if (!orphan) {
7299                         ret = -ENOMEM;
7300                         goto out;
7301                 }
7302                 INIT_LIST_HEAD(&orphan->list);
7303                 orphan->root = dback->root;
7304                 orphan->objectid = dback->owner;
7305                 orphan->offset = dback->offset;
7306                 orphan->disk_bytenr = rec->cache.start;
7307                 orphan->disk_len = rec->cache.size;
7308                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7309                 recorded_data_ref = 1;
7310         }
7311 out:
7312         btrfs_free_path(path);
7313         if (!ret)
7314                 return !recorded_data_ref;
7315         else
7316                 return ret;
7317 }
7318
7319 /*
7320  * when an incorrect extent item is found, this will delete
7321  * all of the existing entries for it and recreate them
7322  * based on what the tree scan found.
7323  */
7324 static int fixup_extent_refs(struct btrfs_fs_info *info,
7325                              struct cache_tree *extent_cache,
7326                              struct extent_record *rec)
7327 {
7328         struct btrfs_trans_handle *trans = NULL;
7329         int ret;
7330         struct btrfs_path *path;
7331         struct list_head *cur = rec->backrefs.next;
7332         struct cache_extent *cache;
7333         struct extent_backref *back;
7334         int allocated = 0;
7335         u64 flags = 0;
7336
7337         if (rec->flag_block_full_backref)
7338                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7339
7340         path = btrfs_alloc_path();
7341         if (!path)
7342                 return -ENOMEM;
7343
7344         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7345                 /*
7346                  * Sometimes the backrefs themselves are so broken they don't
7347                  * get attached to any meaningful rec, so first go back and
7348                  * check any of our backrefs that we couldn't find and throw
7349                  * them into the list if we find the backref so that
7350                  * verify_backrefs can figure out what to do.
7351                  */
7352                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7353                 if (ret < 0)
7354                         goto out;
7355         }
7356
7357         /* step one, make sure all of the backrefs agree */
7358         ret = verify_backrefs(info, path, rec);
7359         if (ret < 0)
7360                 goto out;
7361
7362         trans = btrfs_start_transaction(info->extent_root, 1);
7363         if (IS_ERR(trans)) {
7364                 ret = PTR_ERR(trans);
7365                 goto out;
7366         }
7367
7368         /* step two, delete all the existing records */
7369         ret = delete_extent_records(trans, info->extent_root, path,
7370                                     rec->start, rec->max_size);
7371
7372         if (ret < 0)
7373                 goto out;
7374
7375         /* was this block corrupt?  If so, don't add references to it */
7376         cache = lookup_cache_extent(info->corrupt_blocks,
7377                                     rec->start, rec->max_size);
7378         if (cache) {
7379                 ret = 0;
7380                 goto out;
7381         }
7382
7383         /* step three, recreate all the refs we did find */
7384         while(cur != &rec->backrefs) {
7385                 back = list_entry(cur, struct extent_backref, list);
7386                 cur = cur->next;
7387
7388                 /*
7389                  * if we didn't find any references, don't create a
7390                  * new extent record
7391                  */
7392                 if (!back->found_ref)
7393                         continue;
7394
7395                 rec->bad_full_backref = 0;
7396                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7397                 allocated = 1;
7398
7399                 if (ret)
7400                         goto out;
7401         }
7402 out:
7403         if (trans) {
7404                 int err = btrfs_commit_transaction(trans, info->extent_root);
7405                 if (!ret)
7406                         ret = err;
7407         }
7408
7409         btrfs_free_path(path);
7410         return ret;
7411 }
7412
7413 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7414                               struct extent_record *rec)
7415 {
7416         struct btrfs_trans_handle *trans;
7417         struct btrfs_root *root = fs_info->extent_root;
7418         struct btrfs_path *path;
7419         struct btrfs_extent_item *ei;
7420         struct btrfs_key key;
7421         u64 flags;
7422         int ret = 0;
7423
7424         key.objectid = rec->start;
7425         if (rec->metadata) {
7426                 key.type = BTRFS_METADATA_ITEM_KEY;
7427                 key.offset = rec->info_level;
7428         } else {
7429                 key.type = BTRFS_EXTENT_ITEM_KEY;
7430                 key.offset = rec->max_size;
7431         }
7432
7433         path = btrfs_alloc_path();
7434         if (!path)
7435                 return -ENOMEM;
7436
7437         trans = btrfs_start_transaction(root, 0);
7438         if (IS_ERR(trans)) {
7439                 btrfs_free_path(path);
7440                 return PTR_ERR(trans);
7441         }
7442
7443         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7444         if (ret < 0) {
7445                 btrfs_free_path(path);
7446                 btrfs_commit_transaction(trans, root);
7447                 return ret;
7448         } else if (ret) {
7449                 fprintf(stderr, "Didn't find extent for %llu\n",
7450                         (unsigned long long)rec->start);
7451                 btrfs_free_path(path);
7452                 btrfs_commit_transaction(trans, root);
7453                 return -ENOENT;
7454         }
7455
7456         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7457                             struct btrfs_extent_item);
7458         flags = btrfs_extent_flags(path->nodes[0], ei);
7459         if (rec->flag_block_full_backref) {
7460                 fprintf(stderr, "setting full backref on %llu\n",
7461                         (unsigned long long)key.objectid);
7462                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7463         } else {
7464                 fprintf(stderr, "clearing full backref on %llu\n",
7465                         (unsigned long long)key.objectid);
7466                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7467         }
7468         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7469         btrfs_mark_buffer_dirty(path->nodes[0]);
7470         btrfs_free_path(path);
7471         return btrfs_commit_transaction(trans, root);
7472 }
7473
7474 /* right now we only prune from the extent allocation tree */
7475 static int prune_one_block(struct btrfs_trans_handle *trans,
7476                            struct btrfs_fs_info *info,
7477                            struct btrfs_corrupt_block *corrupt)
7478 {
7479         int ret;
7480         struct btrfs_path path;
7481         struct extent_buffer *eb;
7482         u64 found;
7483         int slot;
7484         int nritems;
7485         int level = corrupt->level + 1;
7486
7487         btrfs_init_path(&path);
7488 again:
7489         /* we want to stop at the parent to our busted block */
7490         path.lowest_level = level;
7491
7492         ret = btrfs_search_slot(trans, info->extent_root,
7493                                 &corrupt->key, &path, -1, 1);
7494
7495         if (ret < 0)
7496                 goto out;
7497
7498         eb = path.nodes[level];
7499         if (!eb) {
7500                 ret = -ENOENT;
7501                 goto out;
7502         }
7503
7504         /*
7505          * hopefully the search gave us the block we want to prune,
7506          * lets try that first
7507          */
7508         slot = path.slots[level];
7509         found =  btrfs_node_blockptr(eb, slot);
7510         if (found == corrupt->cache.start)
7511                 goto del_ptr;
7512
7513         nritems = btrfs_header_nritems(eb);
7514
7515         /* the search failed, lets scan this node and hope we find it */
7516         for (slot = 0; slot < nritems; slot++) {
7517                 found =  btrfs_node_blockptr(eb, slot);
7518                 if (found == corrupt->cache.start)
7519                         goto del_ptr;
7520         }
7521         /*
7522          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7523          * to this block
7524          */
7525         if (eb == info->extent_root->node) {
7526                 ret = -ENOENT;
7527                 goto out;
7528         } else {
7529                 level++;
7530                 btrfs_release_path(&path);
7531                 goto again;
7532         }
7533
7534 del_ptr:
7535         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7536         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7537
7538 out:
7539         btrfs_release_path(&path);
7540         return ret;
7541 }
7542
7543 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7544 {
7545         struct btrfs_trans_handle *trans = NULL;
7546         struct cache_extent *cache;
7547         struct btrfs_corrupt_block *corrupt;
7548
7549         while (1) {
7550                 cache = search_cache_extent(info->corrupt_blocks, 0);
7551                 if (!cache)
7552                         break;
7553                 if (!trans) {
7554                         trans = btrfs_start_transaction(info->extent_root, 1);
7555                         if (IS_ERR(trans))
7556                                 return PTR_ERR(trans);
7557                 }
7558                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7559                 prune_one_block(trans, info, corrupt);
7560                 remove_cache_extent(info->corrupt_blocks, cache);
7561         }
7562         if (trans)
7563                 return btrfs_commit_transaction(trans, info->extent_root);
7564         return 0;
7565 }
7566
7567 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7568 {
7569         struct btrfs_block_group_cache *cache;
7570         u64 start, end;
7571         int ret;
7572
7573         while (1) {
7574                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7575                                             &start, &end, EXTENT_DIRTY);
7576                 if (ret)
7577                         break;
7578                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7579                                    GFP_NOFS);
7580         }
7581
7582         start = 0;
7583         while (1) {
7584                 cache = btrfs_lookup_first_block_group(fs_info, start);
7585                 if (!cache)
7586                         break;
7587                 if (cache->cached)
7588                         cache->cached = 0;
7589                 start = cache->key.objectid + cache->key.offset;
7590         }
7591 }
7592
7593 static int check_extent_refs(struct btrfs_root *root,
7594                              struct cache_tree *extent_cache)
7595 {
7596         struct extent_record *rec;
7597         struct cache_extent *cache;
7598         int err = 0;
7599         int ret = 0;
7600         int fixed = 0;
7601         int had_dups = 0;
7602         int recorded = 0;
7603
7604         if (repair) {
7605                 /*
7606                  * if we're doing a repair, we have to make sure
7607                  * we don't allocate from the problem extents.
7608                  * In the worst case, this will be all the
7609                  * extents in the FS
7610                  */
7611                 cache = search_cache_extent(extent_cache, 0);
7612                 while(cache) {
7613                         rec = container_of(cache, struct extent_record, cache);
7614                         set_extent_dirty(root->fs_info->excluded_extents,
7615                                          rec->start,
7616                                          rec->start + rec->max_size - 1,
7617                                          GFP_NOFS);
7618                         cache = next_cache_extent(cache);
7619                 }
7620
7621                 /* pin down all the corrupted blocks too */
7622                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7623                 while(cache) {
7624                         set_extent_dirty(root->fs_info->excluded_extents,
7625                                          cache->start,
7626                                          cache->start + cache->size - 1,
7627                                          GFP_NOFS);
7628                         cache = next_cache_extent(cache);
7629                 }
7630                 prune_corrupt_blocks(root->fs_info);
7631                 reset_cached_block_groups(root->fs_info);
7632         }
7633
7634         reset_cached_block_groups(root->fs_info);
7635
7636         /*
7637          * We need to delete any duplicate entries we find first otherwise we
7638          * could mess up the extent tree when we have backrefs that actually
7639          * belong to a different extent item and not the weird duplicate one.
7640          */
7641         while (repair && !list_empty(&duplicate_extents)) {
7642                 rec = list_entry(duplicate_extents.next, struct extent_record,
7643                                  list);
7644                 list_del_init(&rec->list);
7645
7646                 /* Sometimes we can find a backref before we find an actual
7647                  * extent, so we need to process it a little bit to see if there
7648                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7649                  * if this is a backref screwup.  If we need to delete stuff
7650                  * process_duplicates() will return 0, otherwise it will return
7651                  * 1 and we
7652                  */
7653                 if (process_duplicates(root, extent_cache, rec))
7654                         continue;
7655                 ret = delete_duplicate_records(root, rec);
7656                 if (ret < 0)
7657                         return ret;
7658                 /*
7659                  * delete_duplicate_records will return the number of entries
7660                  * deleted, so if it's greater than 0 then we know we actually
7661                  * did something and we need to remove.
7662                  */
7663                 if (ret)
7664                         had_dups = 1;
7665         }
7666
7667         if (had_dups)
7668                 return -EAGAIN;
7669
7670         while(1) {
7671                 int cur_err = 0;
7672
7673                 fixed = 0;
7674                 recorded = 0;
7675                 cache = search_cache_extent(extent_cache, 0);
7676                 if (!cache)
7677                         break;
7678                 rec = container_of(cache, struct extent_record, cache);
7679                 if (rec->num_duplicates) {
7680                         fprintf(stderr, "extent item %llu has multiple extent "
7681                                 "items\n", (unsigned long long)rec->start);
7682                         err = 1;
7683                         cur_err = 1;
7684                 }
7685
7686                 if (rec->refs != rec->extent_item_refs) {
7687                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7688                                 (unsigned long long)rec->start,
7689                                 (unsigned long long)rec->nr);
7690                         fprintf(stderr, "extent item %llu, found %llu\n",
7691                                 (unsigned long long)rec->extent_item_refs,
7692                                 (unsigned long long)rec->refs);
7693                         ret = record_orphan_data_extents(root->fs_info, rec);
7694                         if (ret < 0)
7695                                 goto repair_abort;
7696                         if (ret == 0) {
7697                                 recorded = 1;
7698                         } else {
7699                                 /*
7700                                  * we can't use the extent to repair file
7701                                  * extent, let the fallback method handle it.
7702                                  */
7703                                 if (!fixed && repair) {
7704                                         ret = fixup_extent_refs(
7705                                                         root->fs_info,
7706                                                         extent_cache, rec);
7707                                         if (ret)
7708                                                 goto repair_abort;
7709                                         fixed = 1;
7710                                 }
7711                         }
7712                         err = 1;
7713                         cur_err = 1;
7714                 }
7715                 if (all_backpointers_checked(rec, 1)) {
7716                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7717                                 (unsigned long long)rec->start,
7718                                 (unsigned long long)rec->nr);
7719
7720                         if (!fixed && !recorded && repair) {
7721                                 ret = fixup_extent_refs(root->fs_info,
7722                                                         extent_cache, rec);
7723                                 if (ret)
7724                                         goto repair_abort;
7725                                 fixed = 1;
7726                         }
7727                         cur_err = 1;
7728                         err = 1;
7729                 }
7730                 if (!rec->owner_ref_checked) {
7731                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7732                                 (unsigned long long)rec->start,
7733                                 (unsigned long long)rec->nr);
7734                         if (!fixed && !recorded && repair) {
7735                                 ret = fixup_extent_refs(root->fs_info,
7736                                                         extent_cache, rec);
7737                                 if (ret)
7738                                         goto repair_abort;
7739                                 fixed = 1;
7740                         }
7741                         err = 1;
7742                         cur_err = 1;
7743                 }
7744                 if (rec->bad_full_backref) {
7745                         fprintf(stderr, "bad full backref, on [%llu]\n",
7746                                 (unsigned long long)rec->start);
7747                         if (repair) {
7748                                 ret = fixup_extent_flags(root->fs_info, rec);
7749                                 if (ret)
7750                                         goto repair_abort;
7751                                 fixed = 1;
7752                         }
7753                         err = 1;
7754                         cur_err = 1;
7755                 }
7756                 /*
7757                  * Although it's not a extent ref's problem, we reuse this
7758                  * routine for error reporting.
7759                  * No repair function yet.
7760                  */
7761                 if (rec->crossing_stripes) {
7762                         fprintf(stderr,
7763                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7764                                 rec->start, rec->start + rec->max_size);
7765                         err = 1;
7766                         cur_err = 1;
7767                 }
7768
7769                 if (rec->wrong_chunk_type) {
7770                         fprintf(stderr,
7771                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7772                                 rec->start, rec->start + rec->max_size);
7773                         err = 1;
7774                         cur_err = 1;
7775                 }
7776
7777                 remove_cache_extent(extent_cache, cache);
7778                 free_all_extent_backrefs(rec);
7779                 if (!init_extent_tree && repair && (!cur_err || fixed))
7780                         clear_extent_dirty(root->fs_info->excluded_extents,
7781                                            rec->start,
7782                                            rec->start + rec->max_size - 1,
7783                                            GFP_NOFS);
7784                 free(rec);
7785         }
7786 repair_abort:
7787         if (repair) {
7788                 if (ret && ret != -EAGAIN) {
7789                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7790                         exit(1);
7791                 } else if (!ret) {
7792                         struct btrfs_trans_handle *trans;
7793
7794                         root = root->fs_info->extent_root;
7795                         trans = btrfs_start_transaction(root, 1);
7796                         if (IS_ERR(trans)) {
7797                                 ret = PTR_ERR(trans);
7798                                 goto repair_abort;
7799                         }
7800
7801                         btrfs_fix_block_accounting(trans, root);
7802                         ret = btrfs_commit_transaction(trans, root);
7803                         if (ret)
7804                                 goto repair_abort;
7805                 }
7806                 if (err)
7807                         fprintf(stderr, "repaired damaged extent references\n");
7808                 return ret;
7809         }
7810         return err;
7811 }
7812
7813 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7814 {
7815         u64 stripe_size;
7816
7817         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7818                 stripe_size = length;
7819                 stripe_size /= num_stripes;
7820         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7821                 stripe_size = length * 2;
7822                 stripe_size /= num_stripes;
7823         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7824                 stripe_size = length;
7825                 stripe_size /= (num_stripes - 1);
7826         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7827                 stripe_size = length;
7828                 stripe_size /= (num_stripes - 2);
7829         } else {
7830                 stripe_size = length;
7831         }
7832         return stripe_size;
7833 }
7834
7835 /*
7836  * Check the chunk with its block group/dev list ref:
7837  * Return 0 if all refs seems valid.
7838  * Return 1 if part of refs seems valid, need later check for rebuild ref
7839  * like missing block group and needs to search extent tree to rebuild them.
7840  * Return -1 if essential refs are missing and unable to rebuild.
7841  */
7842 static int check_chunk_refs(struct chunk_record *chunk_rec,
7843                             struct block_group_tree *block_group_cache,
7844                             struct device_extent_tree *dev_extent_cache,
7845                             int silent)
7846 {
7847         struct cache_extent *block_group_item;
7848         struct block_group_record *block_group_rec;
7849         struct cache_extent *dev_extent_item;
7850         struct device_extent_record *dev_extent_rec;
7851         u64 devid;
7852         u64 offset;
7853         u64 length;
7854         int metadump_v2 = 0;
7855         int i;
7856         int ret = 0;
7857
7858         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7859                                                chunk_rec->offset,
7860                                                chunk_rec->length);
7861         if (block_group_item) {
7862                 block_group_rec = container_of(block_group_item,
7863                                                struct block_group_record,
7864                                                cache);
7865                 if (chunk_rec->length != block_group_rec->offset ||
7866                     chunk_rec->offset != block_group_rec->objectid ||
7867                     (!metadump_v2 &&
7868                      chunk_rec->type_flags != block_group_rec->flags)) {
7869                         if (!silent)
7870                                 fprintf(stderr,
7871                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7872                                         chunk_rec->objectid,
7873                                         chunk_rec->type,
7874                                         chunk_rec->offset,
7875                                         chunk_rec->length,
7876                                         chunk_rec->offset,
7877                                         chunk_rec->type_flags,
7878                                         block_group_rec->objectid,
7879                                         block_group_rec->type,
7880                                         block_group_rec->offset,
7881                                         block_group_rec->offset,
7882                                         block_group_rec->objectid,
7883                                         block_group_rec->flags);
7884                         ret = -1;
7885                 } else {
7886                         list_del_init(&block_group_rec->list);
7887                         chunk_rec->bg_rec = block_group_rec;
7888                 }
7889         } else {
7890                 if (!silent)
7891                         fprintf(stderr,
7892                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7893                                 chunk_rec->objectid,
7894                                 chunk_rec->type,
7895                                 chunk_rec->offset,
7896                                 chunk_rec->length,
7897                                 chunk_rec->offset,
7898                                 chunk_rec->type_flags);
7899                 ret = 1;
7900         }
7901
7902         if (metadump_v2)
7903                 return ret;
7904
7905         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7906                                     chunk_rec->num_stripes);
7907         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7908                 devid = chunk_rec->stripes[i].devid;
7909                 offset = chunk_rec->stripes[i].offset;
7910                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7911                                                        devid, offset, length);
7912                 if (dev_extent_item) {
7913                         dev_extent_rec = container_of(dev_extent_item,
7914                                                 struct device_extent_record,
7915                                                 cache);
7916                         if (dev_extent_rec->objectid != devid ||
7917                             dev_extent_rec->offset != offset ||
7918                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7919                             dev_extent_rec->length != length) {
7920                                 if (!silent)
7921                                         fprintf(stderr,
7922                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7923                                                 chunk_rec->objectid,
7924                                                 chunk_rec->type,
7925                                                 chunk_rec->offset,
7926                                                 chunk_rec->stripes[i].devid,
7927                                                 chunk_rec->stripes[i].offset,
7928                                                 dev_extent_rec->objectid,
7929                                                 dev_extent_rec->offset,
7930                                                 dev_extent_rec->length);
7931                                 ret = -1;
7932                         } else {
7933                                 list_move(&dev_extent_rec->chunk_list,
7934                                           &chunk_rec->dextents);
7935                         }
7936                 } else {
7937                         if (!silent)
7938                                 fprintf(stderr,
7939                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7940                                         chunk_rec->objectid,
7941                                         chunk_rec->type,
7942                                         chunk_rec->offset,
7943                                         chunk_rec->stripes[i].devid,
7944                                         chunk_rec->stripes[i].offset);
7945                         ret = -1;
7946                 }
7947         }
7948         return ret;
7949 }
7950
7951 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7952 int check_chunks(struct cache_tree *chunk_cache,
7953                  struct block_group_tree *block_group_cache,
7954                  struct device_extent_tree *dev_extent_cache,
7955                  struct list_head *good, struct list_head *bad,
7956                  struct list_head *rebuild, int silent)
7957 {
7958         struct cache_extent *chunk_item;
7959         struct chunk_record *chunk_rec;
7960         struct block_group_record *bg_rec;
7961         struct device_extent_record *dext_rec;
7962         int err;
7963         int ret = 0;
7964
7965         chunk_item = first_cache_extent(chunk_cache);
7966         while (chunk_item) {
7967                 chunk_rec = container_of(chunk_item, struct chunk_record,
7968                                          cache);
7969                 err = check_chunk_refs(chunk_rec, block_group_cache,
7970                                        dev_extent_cache, silent);
7971                 if (err < 0)
7972                         ret = err;
7973                 if (err == 0 && good)
7974                         list_add_tail(&chunk_rec->list, good);
7975                 if (err > 0 && rebuild)
7976                         list_add_tail(&chunk_rec->list, rebuild);
7977                 if (err < 0 && bad)
7978                         list_add_tail(&chunk_rec->list, bad);
7979                 chunk_item = next_cache_extent(chunk_item);
7980         }
7981
7982         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7983                 if (!silent)
7984                         fprintf(stderr,
7985                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7986                                 bg_rec->objectid,
7987                                 bg_rec->offset,
7988                                 bg_rec->flags);
7989                 if (!ret)
7990                         ret = 1;
7991         }
7992
7993         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7994                             chunk_list) {
7995                 if (!silent)
7996                         fprintf(stderr,
7997                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
7998                                 dext_rec->objectid,
7999                                 dext_rec->offset,
8000                                 dext_rec->length);
8001                 if (!ret)
8002                         ret = 1;
8003         }
8004         return ret;
8005 }
8006
8007
8008 static int check_device_used(struct device_record *dev_rec,
8009                              struct device_extent_tree *dext_cache)
8010 {
8011         struct cache_extent *cache;
8012         struct device_extent_record *dev_extent_rec;
8013         u64 total_byte = 0;
8014
8015         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8016         while (cache) {
8017                 dev_extent_rec = container_of(cache,
8018                                               struct device_extent_record,
8019                                               cache);
8020                 if (dev_extent_rec->objectid != dev_rec->devid)
8021                         break;
8022
8023                 list_del_init(&dev_extent_rec->device_list);
8024                 total_byte += dev_extent_rec->length;
8025                 cache = next_cache_extent(cache);
8026         }
8027
8028         if (total_byte != dev_rec->byte_used) {
8029                 fprintf(stderr,
8030                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8031                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8032                         dev_rec->type, dev_rec->offset);
8033                 return -1;
8034         } else {
8035                 return 0;
8036         }
8037 }
8038
8039 /* check btrfs_dev_item -> btrfs_dev_extent */
8040 static int check_devices(struct rb_root *dev_cache,
8041                          struct device_extent_tree *dev_extent_cache)
8042 {
8043         struct rb_node *dev_node;
8044         struct device_record *dev_rec;
8045         struct device_extent_record *dext_rec;
8046         int err;
8047         int ret = 0;
8048
8049         dev_node = rb_first(dev_cache);
8050         while (dev_node) {
8051                 dev_rec = container_of(dev_node, struct device_record, node);
8052                 err = check_device_used(dev_rec, dev_extent_cache);
8053                 if (err)
8054                         ret = err;
8055
8056                 dev_node = rb_next(dev_node);
8057         }
8058         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8059                             device_list) {
8060                 fprintf(stderr,
8061                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8062                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8063                 if (!ret)
8064                         ret = 1;
8065         }
8066         return ret;
8067 }
8068
8069 static int add_root_item_to_list(struct list_head *head,
8070                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8071                                   u8 level, u8 drop_level,
8072                                   int level_size, struct btrfs_key *drop_key)
8073 {
8074
8075         struct root_item_record *ri_rec;
8076         ri_rec = malloc(sizeof(*ri_rec));
8077         if (!ri_rec)
8078                 return -ENOMEM;
8079         ri_rec->bytenr = bytenr;
8080         ri_rec->objectid = objectid;
8081         ri_rec->level = level;
8082         ri_rec->level_size = level_size;
8083         ri_rec->drop_level = drop_level;
8084         ri_rec->last_snapshot = last_snapshot;
8085         if (drop_key)
8086                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8087         list_add_tail(&ri_rec->list, head);
8088
8089         return 0;
8090 }
8091
8092 static void free_root_item_list(struct list_head *list)
8093 {
8094         struct root_item_record *ri_rec;
8095
8096         while (!list_empty(list)) {
8097                 ri_rec = list_first_entry(list, struct root_item_record,
8098                                           list);
8099                 list_del_init(&ri_rec->list);
8100                 free(ri_rec);
8101         }
8102 }
8103
8104 static int deal_root_from_list(struct list_head *list,
8105                                struct btrfs_root *root,
8106                                struct block_info *bits,
8107                                int bits_nr,
8108                                struct cache_tree *pending,
8109                                struct cache_tree *seen,
8110                                struct cache_tree *reada,
8111                                struct cache_tree *nodes,
8112                                struct cache_tree *extent_cache,
8113                                struct cache_tree *chunk_cache,
8114                                struct rb_root *dev_cache,
8115                                struct block_group_tree *block_group_cache,
8116                                struct device_extent_tree *dev_extent_cache)
8117 {
8118         int ret = 0;
8119         u64 last;
8120
8121         while (!list_empty(list)) {
8122                 struct root_item_record *rec;
8123                 struct extent_buffer *buf;
8124                 rec = list_entry(list->next,
8125                                  struct root_item_record, list);
8126                 last = 0;
8127                 buf = read_tree_block(root->fs_info->tree_root,
8128                                       rec->bytenr, rec->level_size, 0);
8129                 if (!extent_buffer_uptodate(buf)) {
8130                         free_extent_buffer(buf);
8131                         ret = -EIO;
8132                         break;
8133                 }
8134                 add_root_to_pending(buf, extent_cache, pending,
8135                                     seen, nodes, rec->objectid);
8136                 /*
8137                  * To rebuild extent tree, we need deal with snapshot
8138                  * one by one, otherwise we deal with node firstly which
8139                  * can maximize readahead.
8140                  */
8141                 while (1) {
8142                         ret = run_next_block(root, bits, bits_nr, &last,
8143                                              pending, seen, reada, nodes,
8144                                              extent_cache, chunk_cache,
8145                                              dev_cache, block_group_cache,
8146                                              dev_extent_cache, rec);
8147                         if (ret != 0)
8148                                 break;
8149                 }
8150                 free_extent_buffer(buf);
8151                 list_del(&rec->list);
8152                 free(rec);
8153                 if (ret < 0)
8154                         break;
8155         }
8156         while (ret >= 0) {
8157                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8158                                      reada, nodes, extent_cache, chunk_cache,
8159                                      dev_cache, block_group_cache,
8160                                      dev_extent_cache, NULL);
8161                 if (ret != 0) {
8162                         if (ret > 0)
8163                                 ret = 0;
8164                         break;
8165                 }
8166         }
8167         return ret;
8168 }
8169
8170 static int check_chunks_and_extents(struct btrfs_root *root)
8171 {
8172         struct rb_root dev_cache;
8173         struct cache_tree chunk_cache;
8174         struct block_group_tree block_group_cache;
8175         struct device_extent_tree dev_extent_cache;
8176         struct cache_tree extent_cache;
8177         struct cache_tree seen;
8178         struct cache_tree pending;
8179         struct cache_tree reada;
8180         struct cache_tree nodes;
8181         struct extent_io_tree excluded_extents;
8182         struct cache_tree corrupt_blocks;
8183         struct btrfs_path path;
8184         struct btrfs_key key;
8185         struct btrfs_key found_key;
8186         int ret, err = 0;
8187         struct block_info *bits;
8188         int bits_nr;
8189         struct extent_buffer *leaf;
8190         int slot;
8191         struct btrfs_root_item ri;
8192         struct list_head dropping_trees;
8193         struct list_head normal_trees;
8194         struct btrfs_root *root1;
8195         u64 objectid;
8196         u32 level_size;
8197         u8 level;
8198
8199         dev_cache = RB_ROOT;
8200         cache_tree_init(&chunk_cache);
8201         block_group_tree_init(&block_group_cache);
8202         device_extent_tree_init(&dev_extent_cache);
8203
8204         cache_tree_init(&extent_cache);
8205         cache_tree_init(&seen);
8206         cache_tree_init(&pending);
8207         cache_tree_init(&nodes);
8208         cache_tree_init(&reada);
8209         cache_tree_init(&corrupt_blocks);
8210         extent_io_tree_init(&excluded_extents);
8211         INIT_LIST_HEAD(&dropping_trees);
8212         INIT_LIST_HEAD(&normal_trees);
8213
8214         if (repair) {
8215                 root->fs_info->excluded_extents = &excluded_extents;
8216                 root->fs_info->fsck_extent_cache = &extent_cache;
8217                 root->fs_info->free_extent_hook = free_extent_hook;
8218                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8219         }
8220
8221         bits_nr = 1024;
8222         bits = malloc(bits_nr * sizeof(struct block_info));
8223         if (!bits) {
8224                 perror("malloc");
8225                 exit(1);
8226         }
8227
8228         if (ctx.progress_enabled) {
8229                 ctx.tp = TASK_EXTENTS;
8230                 task_start(ctx.info);
8231         }
8232
8233 again:
8234         root1 = root->fs_info->tree_root;
8235         level = btrfs_header_level(root1->node);
8236         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8237                                     root1->node->start, 0, level, 0,
8238                                     root1->nodesize, NULL);
8239         if (ret < 0)
8240                 goto out;
8241         root1 = root->fs_info->chunk_root;
8242         level = btrfs_header_level(root1->node);
8243         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8244                                     root1->node->start, 0, level, 0,
8245                                     root1->nodesize, NULL);
8246         if (ret < 0)
8247                 goto out;
8248         btrfs_init_path(&path);
8249         key.offset = 0;
8250         key.objectid = 0;
8251         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8252         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8253                                         &key, &path, 0, 0);
8254         if (ret < 0)
8255                 goto out;
8256         while(1) {
8257                 leaf = path.nodes[0];
8258                 slot = path.slots[0];
8259                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8260                         ret = btrfs_next_leaf(root, &path);
8261                         if (ret != 0)
8262                                 break;
8263                         leaf = path.nodes[0];
8264                         slot = path.slots[0];
8265                 }
8266                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8267                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8268                         unsigned long offset;
8269                         u64 last_snapshot;
8270
8271                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8272                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8273                         last_snapshot = btrfs_root_last_snapshot(&ri);
8274                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8275                                 level = btrfs_root_level(&ri);
8276                                 level_size = root->nodesize;
8277                                 ret = add_root_item_to_list(&normal_trees,
8278                                                 found_key.objectid,
8279                                                 btrfs_root_bytenr(&ri),
8280                                                 last_snapshot, level,
8281                                                 0, level_size, NULL);
8282                                 if (ret < 0)
8283                                         goto out;
8284                         } else {
8285                                 level = btrfs_root_level(&ri);
8286                                 level_size = root->nodesize;
8287                                 objectid = found_key.objectid;
8288                                 btrfs_disk_key_to_cpu(&found_key,
8289                                                       &ri.drop_progress);
8290                                 ret = add_root_item_to_list(&dropping_trees,
8291                                                 objectid,
8292                                                 btrfs_root_bytenr(&ri),
8293                                                 last_snapshot, level,
8294                                                 ri.drop_level,
8295                                                 level_size, &found_key);
8296                                 if (ret < 0)
8297                                         goto out;
8298                         }
8299                 }
8300                 path.slots[0]++;
8301         }
8302         btrfs_release_path(&path);
8303
8304         /*
8305          * check_block can return -EAGAIN if it fixes something, please keep
8306          * this in mind when dealing with return values from these functions, if
8307          * we get -EAGAIN we want to fall through and restart the loop.
8308          */
8309         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8310                                   &seen, &reada, &nodes, &extent_cache,
8311                                   &chunk_cache, &dev_cache, &block_group_cache,
8312                                   &dev_extent_cache);
8313         if (ret < 0) {
8314                 if (ret == -EAGAIN)
8315                         goto loop;
8316                 goto out;
8317         }
8318         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8319                                   &pending, &seen, &reada, &nodes,
8320                                   &extent_cache, &chunk_cache, &dev_cache,
8321                                   &block_group_cache, &dev_extent_cache);
8322         if (ret < 0) {
8323                 if (ret == -EAGAIN)
8324                         goto loop;
8325                 goto out;
8326         }
8327
8328         ret = check_chunks(&chunk_cache, &block_group_cache,
8329                            &dev_extent_cache, NULL, NULL, NULL, 0);
8330         if (ret) {
8331                 if (ret == -EAGAIN)
8332                         goto loop;
8333                 err = ret;
8334         }
8335
8336         ret = check_extent_refs(root, &extent_cache);
8337         if (ret < 0) {
8338                 if (ret == -EAGAIN)
8339                         goto loop;
8340                 goto out;
8341         }
8342
8343         ret = check_devices(&dev_cache, &dev_extent_cache);
8344         if (ret && err)
8345                 ret = err;
8346
8347 out:
8348         task_stop(ctx.info);
8349         if (repair) {
8350                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8351                 extent_io_tree_cleanup(&excluded_extents);
8352                 root->fs_info->fsck_extent_cache = NULL;
8353                 root->fs_info->free_extent_hook = NULL;
8354                 root->fs_info->corrupt_blocks = NULL;
8355                 root->fs_info->excluded_extents = NULL;
8356         }
8357         free(bits);
8358         free_chunk_cache_tree(&chunk_cache);
8359         free_device_cache_tree(&dev_cache);
8360         free_block_group_tree(&block_group_cache);
8361         free_device_extent_tree(&dev_extent_cache);
8362         free_extent_cache_tree(&seen);
8363         free_extent_cache_tree(&pending);
8364         free_extent_cache_tree(&reada);
8365         free_extent_cache_tree(&nodes);
8366         return ret;
8367 loop:
8368         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8369         free_extent_cache_tree(&seen);
8370         free_extent_cache_tree(&pending);
8371         free_extent_cache_tree(&reada);
8372         free_extent_cache_tree(&nodes);
8373         free_chunk_cache_tree(&chunk_cache);
8374         free_block_group_tree(&block_group_cache);
8375         free_device_cache_tree(&dev_cache);
8376         free_device_extent_tree(&dev_extent_cache);
8377         free_extent_record_cache(root->fs_info, &extent_cache);
8378         free_root_item_list(&normal_trees);
8379         free_root_item_list(&dropping_trees);
8380         extent_io_tree_cleanup(&excluded_extents);
8381         goto again;
8382 }
8383
8384 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8385                            struct btrfs_root *root, int overwrite)
8386 {
8387         struct extent_buffer *c;
8388         struct extent_buffer *old = root->node;
8389         int level;
8390         int ret;
8391         struct btrfs_disk_key disk_key = {0,0,0};
8392
8393         level = 0;
8394
8395         if (overwrite) {
8396                 c = old;
8397                 extent_buffer_get(c);
8398                 goto init;
8399         }
8400         c = btrfs_alloc_free_block(trans, root,
8401                                    root->nodesize,
8402                                    root->root_key.objectid,
8403                                    &disk_key, level, 0, 0);
8404         if (IS_ERR(c)) {
8405                 c = old;
8406                 extent_buffer_get(c);
8407                 overwrite = 1;
8408         }
8409 init:
8410         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8411         btrfs_set_header_level(c, level);
8412         btrfs_set_header_bytenr(c, c->start);
8413         btrfs_set_header_generation(c, trans->transid);
8414         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8415         btrfs_set_header_owner(c, root->root_key.objectid);
8416
8417         write_extent_buffer(c, root->fs_info->fsid,
8418                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8419
8420         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8421                             btrfs_header_chunk_tree_uuid(c),
8422                             BTRFS_UUID_SIZE);
8423
8424         btrfs_mark_buffer_dirty(c);
8425         /*
8426          * this case can happen in the following case:
8427          *
8428          * 1.overwrite previous root.
8429          *
8430          * 2.reinit reloc data root, this is because we skip pin
8431          * down reloc data tree before which means we can allocate
8432          * same block bytenr here.
8433          */
8434         if (old->start == c->start) {
8435                 btrfs_set_root_generation(&root->root_item,
8436                                           trans->transid);
8437                 root->root_item.level = btrfs_header_level(root->node);
8438                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8439                                         &root->root_key, &root->root_item);
8440                 if (ret) {
8441                         free_extent_buffer(c);
8442                         return ret;
8443                 }
8444         }
8445         free_extent_buffer(old);
8446         root->node = c;
8447         add_root_to_dirty_list(root);
8448         return 0;
8449 }
8450
8451 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8452                                 struct extent_buffer *eb, int tree_root)
8453 {
8454         struct extent_buffer *tmp;
8455         struct btrfs_root_item *ri;
8456         struct btrfs_key key;
8457         u64 bytenr;
8458         u32 nodesize;
8459         int level = btrfs_header_level(eb);
8460         int nritems;
8461         int ret;
8462         int i;
8463
8464         /*
8465          * If we have pinned this block before, don't pin it again.
8466          * This can not only avoid forever loop with broken filesystem
8467          * but also give us some speedups.
8468          */
8469         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8470                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8471                 return 0;
8472
8473         btrfs_pin_extent(fs_info, eb->start, eb->len);
8474
8475         nodesize = btrfs_super_nodesize(fs_info->super_copy);
8476         nritems = btrfs_header_nritems(eb);
8477         for (i = 0; i < nritems; i++) {
8478                 if (level == 0) {
8479                         btrfs_item_key_to_cpu(eb, &key, i);
8480                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8481                                 continue;
8482                         /* Skip the extent root and reloc roots */
8483                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8484                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8485                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8486                                 continue;
8487                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8488                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8489
8490                         /*
8491                          * If at any point we start needing the real root we
8492                          * will have to build a stump root for the root we are
8493                          * in, but for now this doesn't actually use the root so
8494                          * just pass in extent_root.
8495                          */
8496                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8497                                               nodesize, 0);
8498                         if (!extent_buffer_uptodate(tmp)) {
8499                                 fprintf(stderr, "Error reading root block\n");
8500                                 return -EIO;
8501                         }
8502                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8503                         free_extent_buffer(tmp);
8504                         if (ret)
8505                                 return ret;
8506                 } else {
8507                         bytenr = btrfs_node_blockptr(eb, i);
8508
8509                         /* If we aren't the tree root don't read the block */
8510                         if (level == 1 && !tree_root) {
8511                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
8512                                 continue;
8513                         }
8514
8515                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8516                                               nodesize, 0);
8517                         if (!extent_buffer_uptodate(tmp)) {
8518                                 fprintf(stderr, "Error reading tree block\n");
8519                                 return -EIO;
8520                         }
8521                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8522                         free_extent_buffer(tmp);
8523                         if (ret)
8524                                 return ret;
8525                 }
8526         }
8527
8528         return 0;
8529 }
8530
8531 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8532 {
8533         int ret;
8534
8535         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8536         if (ret)
8537                 return ret;
8538
8539         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8540 }
8541
8542 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8543 {
8544         struct btrfs_block_group_cache *cache;
8545         struct btrfs_path *path;
8546         struct extent_buffer *leaf;
8547         struct btrfs_chunk *chunk;
8548         struct btrfs_key key;
8549         int ret;
8550         u64 start;
8551
8552         path = btrfs_alloc_path();
8553         if (!path)
8554                 return -ENOMEM;
8555
8556         key.objectid = 0;
8557         key.type = BTRFS_CHUNK_ITEM_KEY;
8558         key.offset = 0;
8559
8560         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8561         if (ret < 0) {
8562                 btrfs_free_path(path);
8563                 return ret;
8564         }
8565
8566         /*
8567          * We do this in case the block groups were screwed up and had alloc
8568          * bits that aren't actually set on the chunks.  This happens with
8569          * restored images every time and could happen in real life I guess.
8570          */
8571         fs_info->avail_data_alloc_bits = 0;
8572         fs_info->avail_metadata_alloc_bits = 0;
8573         fs_info->avail_system_alloc_bits = 0;
8574
8575         /* First we need to create the in-memory block groups */
8576         while (1) {
8577                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8578                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8579                         if (ret < 0) {
8580                                 btrfs_free_path(path);
8581                                 return ret;
8582                         }
8583                         if (ret) {
8584                                 ret = 0;
8585                                 break;
8586                         }
8587                 }
8588                 leaf = path->nodes[0];
8589                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8590                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8591                         path->slots[0]++;
8592                         continue;
8593                 }
8594
8595                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8596                                        struct btrfs_chunk);
8597                 btrfs_add_block_group(fs_info, 0,
8598                                       btrfs_chunk_type(leaf, chunk),
8599                                       key.objectid, key.offset,
8600                                       btrfs_chunk_length(leaf, chunk));
8601                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8602                                  key.offset + btrfs_chunk_length(leaf, chunk),
8603                                  GFP_NOFS);
8604                 path->slots[0]++;
8605         }
8606         start = 0;
8607         while (1) {
8608                 cache = btrfs_lookup_first_block_group(fs_info, start);
8609                 if (!cache)
8610                         break;
8611                 cache->cached = 1;
8612                 start = cache->key.objectid + cache->key.offset;
8613         }
8614
8615         btrfs_free_path(path);
8616         return 0;
8617 }
8618
8619 static int reset_balance(struct btrfs_trans_handle *trans,
8620                          struct btrfs_fs_info *fs_info)
8621 {
8622         struct btrfs_root *root = fs_info->tree_root;
8623         struct btrfs_path *path;
8624         struct extent_buffer *leaf;
8625         struct btrfs_key key;
8626         int del_slot, del_nr = 0;
8627         int ret;
8628         int found = 0;
8629
8630         path = btrfs_alloc_path();
8631         if (!path)
8632                 return -ENOMEM;
8633
8634         key.objectid = BTRFS_BALANCE_OBJECTID;
8635         key.type = BTRFS_BALANCE_ITEM_KEY;
8636         key.offset = 0;
8637
8638         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8639         if (ret) {
8640                 if (ret > 0)
8641                         ret = 0;
8642                 if (!ret)
8643                         goto reinit_data_reloc;
8644                 else
8645                         goto out;
8646         }
8647
8648         ret = btrfs_del_item(trans, root, path);
8649         if (ret)
8650                 goto out;
8651         btrfs_release_path(path);
8652
8653         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8654         key.type = BTRFS_ROOT_ITEM_KEY;
8655         key.offset = 0;
8656
8657         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8658         if (ret < 0)
8659                 goto out;
8660         while (1) {
8661                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8662                         if (!found)
8663                                 break;
8664
8665                         if (del_nr) {
8666                                 ret = btrfs_del_items(trans, root, path,
8667                                                       del_slot, del_nr);
8668                                 del_nr = 0;
8669                                 if (ret)
8670                                         goto out;
8671                         }
8672                         key.offset++;
8673                         btrfs_release_path(path);
8674
8675                         found = 0;
8676                         ret = btrfs_search_slot(trans, root, &key, path,
8677                                                 -1, 1);
8678                         if (ret < 0)
8679                                 goto out;
8680                         continue;
8681                 }
8682                 found = 1;
8683                 leaf = path->nodes[0];
8684                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8685                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8686                         break;
8687                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8688                         path->slots[0]++;
8689                         continue;
8690                 }
8691                 if (!del_nr) {
8692                         del_slot = path->slots[0];
8693                         del_nr = 1;
8694                 } else {
8695                         del_nr++;
8696                 }
8697                 path->slots[0]++;
8698         }
8699
8700         if (del_nr) {
8701                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8702                 if (ret)
8703                         goto out;
8704         }
8705         btrfs_release_path(path);
8706
8707 reinit_data_reloc:
8708         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8709         key.type = BTRFS_ROOT_ITEM_KEY;
8710         key.offset = (u64)-1;
8711         root = btrfs_read_fs_root(fs_info, &key);
8712         if (IS_ERR(root)) {
8713                 fprintf(stderr, "Error reading data reloc tree\n");
8714                 ret = PTR_ERR(root);
8715                 goto out;
8716         }
8717         record_root_in_trans(trans, root);
8718         ret = btrfs_fsck_reinit_root(trans, root, 0);
8719         if (ret)
8720                 goto out;
8721         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8722 out:
8723         btrfs_free_path(path);
8724         return ret;
8725 }
8726
8727 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8728                               struct btrfs_fs_info *fs_info)
8729 {
8730         u64 start = 0;
8731         int ret;
8732
8733         /*
8734          * The only reason we don't do this is because right now we're just
8735          * walking the trees we find and pinning down their bytes, we don't look
8736          * at any of the leaves.  In order to do mixed groups we'd have to check
8737          * the leaves of any fs roots and pin down the bytes for any file
8738          * extents we find.  Not hard but why do it if we don't have to?
8739          */
8740         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8741                 fprintf(stderr, "We don't support re-initing the extent tree "
8742                         "for mixed block groups yet, please notify a btrfs "
8743                         "developer you want to do this so they can add this "
8744                         "functionality.\n");
8745                 return -EINVAL;
8746         }
8747
8748         /*
8749          * first we need to walk all of the trees except the extent tree and pin
8750          * down the bytes that are in use so we don't overwrite any existing
8751          * metadata.
8752          */
8753         ret = pin_metadata_blocks(fs_info);
8754         if (ret) {
8755                 fprintf(stderr, "error pinning down used bytes\n");
8756                 return ret;
8757         }
8758
8759         /*
8760          * Need to drop all the block groups since we're going to recreate all
8761          * of them again.
8762          */
8763         btrfs_free_block_groups(fs_info);
8764         ret = reset_block_groups(fs_info);
8765         if (ret) {
8766                 fprintf(stderr, "error resetting the block groups\n");
8767                 return ret;
8768         }
8769
8770         /* Ok we can allocate now, reinit the extent root */
8771         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8772         if (ret) {
8773                 fprintf(stderr, "extent root initialization failed\n");
8774                 /*
8775                  * When the transaction code is updated we should end the
8776                  * transaction, but for now progs only knows about commit so
8777                  * just return an error.
8778                  */
8779                 return ret;
8780         }
8781
8782         /*
8783          * Now we have all the in-memory block groups setup so we can make
8784          * allocations properly, and the metadata we care about is safe since we
8785          * pinned all of it above.
8786          */
8787         while (1) {
8788                 struct btrfs_block_group_cache *cache;
8789
8790                 cache = btrfs_lookup_first_block_group(fs_info, start);
8791                 if (!cache)
8792                         break;
8793                 start = cache->key.objectid + cache->key.offset;
8794                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8795                                         &cache->key, &cache->item,
8796                                         sizeof(cache->item));
8797                 if (ret) {
8798                         fprintf(stderr, "Error adding block group\n");
8799                         return ret;
8800                 }
8801                 btrfs_extent_post_op(trans, fs_info->extent_root);
8802         }
8803
8804         ret = reset_balance(trans, fs_info);
8805         if (ret)
8806                 fprintf(stderr, "error reseting the pending balance\n");
8807
8808         return ret;
8809 }
8810
8811 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8812 {
8813         struct btrfs_path *path;
8814         struct btrfs_trans_handle *trans;
8815         struct btrfs_key key;
8816         int ret;
8817
8818         printf("Recowing metadata block %llu\n", eb->start);
8819         key.objectid = btrfs_header_owner(eb);
8820         key.type = BTRFS_ROOT_ITEM_KEY;
8821         key.offset = (u64)-1;
8822
8823         root = btrfs_read_fs_root(root->fs_info, &key);
8824         if (IS_ERR(root)) {
8825                 fprintf(stderr, "Couldn't find owner root %llu\n",
8826                         key.objectid);
8827                 return PTR_ERR(root);
8828         }
8829
8830         path = btrfs_alloc_path();
8831         if (!path)
8832                 return -ENOMEM;
8833
8834         trans = btrfs_start_transaction(root, 1);
8835         if (IS_ERR(trans)) {
8836                 btrfs_free_path(path);
8837                 return PTR_ERR(trans);
8838         }
8839
8840         path->lowest_level = btrfs_header_level(eb);
8841         if (path->lowest_level)
8842                 btrfs_node_key_to_cpu(eb, &key, 0);
8843         else
8844                 btrfs_item_key_to_cpu(eb, &key, 0);
8845
8846         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8847         btrfs_commit_transaction(trans, root);
8848         btrfs_free_path(path);
8849         return ret;
8850 }
8851
8852 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8853 {
8854         struct btrfs_path *path;
8855         struct btrfs_trans_handle *trans;
8856         struct btrfs_key key;
8857         int ret;
8858
8859         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8860                bad->key.type, bad->key.offset);
8861         key.objectid = bad->root_id;
8862         key.type = BTRFS_ROOT_ITEM_KEY;
8863         key.offset = (u64)-1;
8864
8865         root = btrfs_read_fs_root(root->fs_info, &key);
8866         if (IS_ERR(root)) {
8867                 fprintf(stderr, "Couldn't find owner root %llu\n",
8868                         key.objectid);
8869                 return PTR_ERR(root);
8870         }
8871
8872         path = btrfs_alloc_path();
8873         if (!path)
8874                 return -ENOMEM;
8875
8876         trans = btrfs_start_transaction(root, 1);
8877         if (IS_ERR(trans)) {
8878                 btrfs_free_path(path);
8879                 return PTR_ERR(trans);
8880         }
8881
8882         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8883         if (ret) {
8884                 if (ret > 0)
8885                         ret = 0;
8886                 goto out;
8887         }
8888         ret = btrfs_del_item(trans, root, path);
8889 out:
8890         btrfs_commit_transaction(trans, root);
8891         btrfs_free_path(path);
8892         return ret;
8893 }
8894
8895 static int zero_log_tree(struct btrfs_root *root)
8896 {
8897         struct btrfs_trans_handle *trans;
8898         int ret;
8899
8900         trans = btrfs_start_transaction(root, 1);
8901         if (IS_ERR(trans)) {
8902                 ret = PTR_ERR(trans);
8903                 return ret;
8904         }
8905         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8906         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8907         ret = btrfs_commit_transaction(trans, root);
8908         return ret;
8909 }
8910
8911 static int populate_csum(struct btrfs_trans_handle *trans,
8912                          struct btrfs_root *csum_root, char *buf, u64 start,
8913                          u64 len)
8914 {
8915         u64 offset = 0;
8916         u64 sectorsize;
8917         int ret = 0;
8918
8919         while (offset < len) {
8920                 sectorsize = csum_root->sectorsize;
8921                 ret = read_extent_data(csum_root, buf, start + offset,
8922                                        &sectorsize, 0);
8923                 if (ret)
8924                         break;
8925                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8926                                             start + offset, buf, sectorsize);
8927                 if (ret)
8928                         break;
8929                 offset += sectorsize;
8930         }
8931         return ret;
8932 }
8933
8934 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8935                                       struct btrfs_root *csum_root,
8936                                       struct btrfs_root *cur_root)
8937 {
8938         struct btrfs_path *path;
8939         struct btrfs_key key;
8940         struct extent_buffer *node;
8941         struct btrfs_file_extent_item *fi;
8942         char *buf = NULL;
8943         u64 start = 0;
8944         u64 len = 0;
8945         int slot = 0;
8946         int ret = 0;
8947
8948         path = btrfs_alloc_path();
8949         if (!path)
8950                 return -ENOMEM;
8951         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8952         if (!buf) {
8953                 ret = -ENOMEM;
8954                 goto out;
8955         }
8956
8957         key.objectid = 0;
8958         key.offset = 0;
8959         key.type = 0;
8960
8961         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8962         if (ret < 0)
8963                 goto out;
8964         /* Iterate all regular file extents and fill its csum */
8965         while (1) {
8966                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8967
8968                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8969                         goto next;
8970                 node = path->nodes[0];
8971                 slot = path->slots[0];
8972                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8973                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8974                         goto next;
8975                 start = btrfs_file_extent_disk_bytenr(node, fi);
8976                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8977
8978                 ret = populate_csum(trans, csum_root, buf, start, len);
8979                 if (ret == -EEXIST)
8980                         ret = 0;
8981                 if (ret < 0)
8982                         goto out;
8983 next:
8984                 /*
8985                  * TODO: if next leaf is corrupted, jump to nearest next valid
8986                  * leaf.
8987                  */
8988                 ret = btrfs_next_item(cur_root, path);
8989                 if (ret < 0)
8990                         goto out;
8991                 if (ret > 0) {
8992                         ret = 0;
8993                         goto out;
8994                 }
8995         }
8996
8997 out:
8998         btrfs_free_path(path);
8999         free(buf);
9000         return ret;
9001 }
9002
9003 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9004                                   struct btrfs_root *csum_root)
9005 {
9006         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9007         struct btrfs_path *path;
9008         struct btrfs_root *tree_root = fs_info->tree_root;
9009         struct btrfs_root *cur_root;
9010         struct extent_buffer *node;
9011         struct btrfs_key key;
9012         int slot = 0;
9013         int ret = 0;
9014
9015         path = btrfs_alloc_path();
9016         if (!path)
9017                 return -ENOMEM;
9018
9019         key.objectid = BTRFS_FS_TREE_OBJECTID;
9020         key.offset = 0;
9021         key.type = BTRFS_ROOT_ITEM_KEY;
9022
9023         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9024         if (ret < 0)
9025                 goto out;
9026         if (ret > 0) {
9027                 ret = -ENOENT;
9028                 goto out;
9029         }
9030
9031         while (1) {
9032                 node = path->nodes[0];
9033                 slot = path->slots[0];
9034                 btrfs_item_key_to_cpu(node, &key, slot);
9035                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9036                         goto out;
9037                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9038                         goto next;
9039                 if (!is_fstree(key.objectid))
9040                         goto next;
9041                 key.offset = (u64)-1;
9042
9043                 cur_root = btrfs_read_fs_root(fs_info, &key);
9044                 if (IS_ERR(cur_root) || !cur_root) {
9045                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9046                                 key.objectid);
9047                         goto out;
9048                 }
9049                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9050                                 cur_root);
9051                 if (ret < 0)
9052                         goto out;
9053 next:
9054                 ret = btrfs_next_item(tree_root, path);
9055                 if (ret > 0) {
9056                         ret = 0;
9057                         goto out;
9058                 }
9059                 if (ret < 0)
9060                         goto out;
9061         }
9062
9063 out:
9064         btrfs_free_path(path);
9065         return ret;
9066 }
9067
9068 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9069                                       struct btrfs_root *csum_root)
9070 {
9071         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9072         struct btrfs_path *path;
9073         struct btrfs_extent_item *ei;
9074         struct extent_buffer *leaf;
9075         char *buf;
9076         struct btrfs_key key;
9077         int ret;
9078
9079         path = btrfs_alloc_path();
9080         if (!path)
9081                 return -ENOMEM;
9082
9083         key.objectid = 0;
9084         key.type = BTRFS_EXTENT_ITEM_KEY;
9085         key.offset = 0;
9086
9087         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9088         if (ret < 0) {
9089                 btrfs_free_path(path);
9090                 return ret;
9091         }
9092
9093         buf = malloc(csum_root->sectorsize);
9094         if (!buf) {
9095                 btrfs_free_path(path);
9096                 return -ENOMEM;
9097         }
9098
9099         while (1) {
9100                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9101                         ret = btrfs_next_leaf(extent_root, path);
9102                         if (ret < 0)
9103                                 break;
9104                         if (ret) {
9105                                 ret = 0;
9106                                 break;
9107                         }
9108                 }
9109                 leaf = path->nodes[0];
9110
9111                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9112                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9113                         path->slots[0]++;
9114                         continue;
9115                 }
9116
9117                 ei = btrfs_item_ptr(leaf, path->slots[0],
9118                                     struct btrfs_extent_item);
9119                 if (!(btrfs_extent_flags(leaf, ei) &
9120                       BTRFS_EXTENT_FLAG_DATA)) {
9121                         path->slots[0]++;
9122                         continue;
9123                 }
9124
9125                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9126                                     key.offset);
9127                 if (ret)
9128                         break;
9129                 path->slots[0]++;
9130         }
9131
9132         btrfs_free_path(path);
9133         free(buf);
9134         return ret;
9135 }
9136
9137 /*
9138  * Recalculate the csum and put it into the csum tree.
9139  *
9140  * Extent tree init will wipe out all the extent info, so in that case, we
9141  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9142  * will use fs/subvol trees to init the csum tree.
9143  */
9144 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9145                           struct btrfs_root *csum_root,
9146                           int search_fs_tree)
9147 {
9148         if (search_fs_tree)
9149                 return fill_csum_tree_from_fs(trans, csum_root);
9150         else
9151                 return fill_csum_tree_from_extent(trans, csum_root);
9152 }
9153
9154 static void free_roots_info_cache(void)
9155 {
9156         if (!roots_info_cache)
9157                 return;
9158
9159         while (!cache_tree_empty(roots_info_cache)) {
9160                 struct cache_extent *entry;
9161                 struct root_item_info *rii;
9162
9163                 entry = first_cache_extent(roots_info_cache);
9164                 if (!entry)
9165                         break;
9166                 remove_cache_extent(roots_info_cache, entry);
9167                 rii = container_of(entry, struct root_item_info, cache_extent);
9168                 free(rii);
9169         }
9170
9171         free(roots_info_cache);
9172         roots_info_cache = NULL;
9173 }
9174
9175 static int build_roots_info_cache(struct btrfs_fs_info *info)
9176 {
9177         int ret = 0;
9178         struct btrfs_key key;
9179         struct extent_buffer *leaf;
9180         struct btrfs_path *path;
9181
9182         if (!roots_info_cache) {
9183                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9184                 if (!roots_info_cache)
9185                         return -ENOMEM;
9186                 cache_tree_init(roots_info_cache);
9187         }
9188
9189         path = btrfs_alloc_path();
9190         if (!path)
9191                 return -ENOMEM;
9192
9193         key.objectid = 0;
9194         key.type = BTRFS_EXTENT_ITEM_KEY;
9195         key.offset = 0;
9196
9197         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9198         if (ret < 0)
9199                 goto out;
9200         leaf = path->nodes[0];
9201
9202         while (1) {
9203                 struct btrfs_key found_key;
9204                 struct btrfs_extent_item *ei;
9205                 struct btrfs_extent_inline_ref *iref;
9206                 int slot = path->slots[0];
9207                 int type;
9208                 u64 flags;
9209                 u64 root_id;
9210                 u8 level;
9211                 struct cache_extent *entry;
9212                 struct root_item_info *rii;
9213
9214                 if (slot >= btrfs_header_nritems(leaf)) {
9215                         ret = btrfs_next_leaf(info->extent_root, path);
9216                         if (ret < 0) {
9217                                 break;
9218                         } else if (ret) {
9219                                 ret = 0;
9220                                 break;
9221                         }
9222                         leaf = path->nodes[0];
9223                         slot = path->slots[0];
9224                 }
9225
9226                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9227
9228                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9229                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9230                         goto next;
9231
9232                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9233                 flags = btrfs_extent_flags(leaf, ei);
9234
9235                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9236                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9237                         goto next;
9238
9239                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9240                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9241                         level = found_key.offset;
9242                 } else {
9243                         struct btrfs_tree_block_info *binfo;
9244
9245                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9246                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9247                         level = btrfs_tree_block_level(leaf, binfo);
9248                 }
9249
9250                 /*
9251                  * For a root extent, it must be of the following type and the
9252                  * first (and only one) iref in the item.
9253                  */
9254                 type = btrfs_extent_inline_ref_type(leaf, iref);
9255                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9256                         goto next;
9257
9258                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9259                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9260                 if (!entry) {
9261                         rii = malloc(sizeof(struct root_item_info));
9262                         if (!rii) {
9263                                 ret = -ENOMEM;
9264                                 goto out;
9265                         }
9266                         rii->cache_extent.start = root_id;
9267                         rii->cache_extent.size = 1;
9268                         rii->level = (u8)-1;
9269                         entry = &rii->cache_extent;
9270                         ret = insert_cache_extent(roots_info_cache, entry);
9271                         ASSERT(ret == 0);
9272                 } else {
9273                         rii = container_of(entry, struct root_item_info,
9274                                            cache_extent);
9275                 }
9276
9277                 ASSERT(rii->cache_extent.start == root_id);
9278                 ASSERT(rii->cache_extent.size == 1);
9279
9280                 if (level > rii->level || rii->level == (u8)-1) {
9281                         rii->level = level;
9282                         rii->bytenr = found_key.objectid;
9283                         rii->gen = btrfs_extent_generation(leaf, ei);
9284                         rii->node_count = 1;
9285                 } else if (level == rii->level) {
9286                         rii->node_count++;
9287                 }
9288 next:
9289                 path->slots[0]++;
9290         }
9291
9292 out:
9293         btrfs_free_path(path);
9294
9295         return ret;
9296 }
9297
9298 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9299                                   struct btrfs_path *path,
9300                                   const struct btrfs_key *root_key,
9301                                   const int read_only_mode)
9302 {
9303         const u64 root_id = root_key->objectid;
9304         struct cache_extent *entry;
9305         struct root_item_info *rii;
9306         struct btrfs_root_item ri;
9307         unsigned long offset;
9308
9309         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9310         if (!entry) {
9311                 fprintf(stderr,
9312                         "Error: could not find extent items for root %llu\n",
9313                         root_key->objectid);
9314                 return -ENOENT;
9315         }
9316
9317         rii = container_of(entry, struct root_item_info, cache_extent);
9318         ASSERT(rii->cache_extent.start == root_id);
9319         ASSERT(rii->cache_extent.size == 1);
9320
9321         if (rii->node_count != 1) {
9322                 fprintf(stderr,
9323                         "Error: could not find btree root extent for root %llu\n",
9324                         root_id);
9325                 return -ENOENT;
9326         }
9327
9328         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9329         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9330
9331         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9332             btrfs_root_level(&ri) != rii->level ||
9333             btrfs_root_generation(&ri) != rii->gen) {
9334
9335                 /*
9336                  * If we're in repair mode but our caller told us to not update
9337                  * the root item, i.e. just check if it needs to be updated, don't
9338                  * print this message, since the caller will call us again shortly
9339                  * for the same root item without read only mode (the caller will
9340                  * open a transaction first).
9341                  */
9342                 if (!(read_only_mode && repair))
9343                         fprintf(stderr,
9344                                 "%sroot item for root %llu,"
9345                                 " current bytenr %llu, current gen %llu, current level %u,"
9346                                 " new bytenr %llu, new gen %llu, new level %u\n",
9347                                 (read_only_mode ? "" : "fixing "),
9348                                 root_id,
9349                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9350                                 btrfs_root_level(&ri),
9351                                 rii->bytenr, rii->gen, rii->level);
9352
9353                 if (btrfs_root_generation(&ri) > rii->gen) {
9354                         fprintf(stderr,
9355                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9356                                 root_id, btrfs_root_generation(&ri), rii->gen);
9357                         return -EINVAL;
9358                 }
9359
9360                 if (!read_only_mode) {
9361                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9362                         btrfs_set_root_level(&ri, rii->level);
9363                         btrfs_set_root_generation(&ri, rii->gen);
9364                         write_extent_buffer(path->nodes[0], &ri,
9365                                             offset, sizeof(ri));
9366                 }
9367
9368                 return 1;
9369         }
9370
9371         return 0;
9372 }
9373
9374 /*
9375  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9376  * caused read-only snapshots to be corrupted if they were created at a moment
9377  * when the source subvolume/snapshot had orphan items. The issue was that the
9378  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9379  * node instead of the post orphan cleanup root node.
9380  * So this function, and its callees, just detects and fixes those cases. Even
9381  * though the regression was for read-only snapshots, this function applies to
9382  * any snapshot/subvolume root.
9383  * This must be run before any other repair code - not doing it so, makes other
9384  * repair code delete or modify backrefs in the extent tree for example, which
9385  * will result in an inconsistent fs after repairing the root items.
9386  */
9387 static int repair_root_items(struct btrfs_fs_info *info)
9388 {
9389         struct btrfs_path *path = NULL;
9390         struct btrfs_key key;
9391         struct extent_buffer *leaf;
9392         struct btrfs_trans_handle *trans = NULL;
9393         int ret = 0;
9394         int bad_roots = 0;
9395         int need_trans = 0;
9396
9397         ret = build_roots_info_cache(info);
9398         if (ret)
9399                 goto out;
9400
9401         path = btrfs_alloc_path();
9402         if (!path) {
9403                 ret = -ENOMEM;
9404                 goto out;
9405         }
9406
9407         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9408         key.type = BTRFS_ROOT_ITEM_KEY;
9409         key.offset = 0;
9410
9411 again:
9412         /*
9413          * Avoid opening and committing transactions if a leaf doesn't have
9414          * any root items that need to be fixed, so that we avoid rotating
9415          * backup roots unnecessarily.
9416          */
9417         if (need_trans) {
9418                 trans = btrfs_start_transaction(info->tree_root, 1);
9419                 if (IS_ERR(trans)) {
9420                         ret = PTR_ERR(trans);
9421                         goto out;
9422                 }
9423         }
9424
9425         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9426                                 0, trans ? 1 : 0);
9427         if (ret < 0)
9428                 goto out;
9429         leaf = path->nodes[0];
9430
9431         while (1) {
9432                 struct btrfs_key found_key;
9433
9434                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9435                         int no_more_keys = find_next_key(path, &key);
9436
9437                         btrfs_release_path(path);
9438                         if (trans) {
9439                                 ret = btrfs_commit_transaction(trans,
9440                                                                info->tree_root);
9441                                 trans = NULL;
9442                                 if (ret < 0)
9443                                         goto out;
9444                         }
9445                         need_trans = 0;
9446                         if (no_more_keys)
9447                                 break;
9448                         goto again;
9449                 }
9450
9451                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9452
9453                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9454                         goto next;
9455                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9456                         goto next;
9457
9458                 ret = maybe_repair_root_item(info, path, &found_key,
9459                                              trans ? 0 : 1);
9460                 if (ret < 0)
9461                         goto out;
9462                 if (ret) {
9463                         if (!trans && repair) {
9464                                 need_trans = 1;
9465                                 key = found_key;
9466                                 btrfs_release_path(path);
9467                                 goto again;
9468                         }
9469                         bad_roots++;
9470                 }
9471 next:
9472                 path->slots[0]++;
9473         }
9474         ret = 0;
9475 out:
9476         free_roots_info_cache();
9477         btrfs_free_path(path);
9478         if (trans)
9479                 btrfs_commit_transaction(trans, info->tree_root);
9480         if (ret < 0)
9481                 return ret;
9482
9483         return bad_roots;
9484 }
9485
9486 const char * const cmd_check_usage[] = {
9487         "btrfs check [options] <device>",
9488         "Check structural inegrity of a filesystem (unmounted).",
9489         "Check structural inegrity of an unmounted filesystem. Verify internal",
9490         "trees' consistency and item connectivity. In the repair mode try to",
9491         "fix the problems found.",
9492         "WARNING: the repair mode is considered dangerous",
9493         "",
9494         "-s|--super <superblock>     use this superblock copy",
9495         "-b|--backup                 use the first valid backup root copy",
9496         "--repair                    try to repair the filesystem",
9497         "--readonly                  run in read-only mode (default)",
9498         "--init-csum-tree            create a new CRC tree",
9499         "--init-extent-tree          create a new extent tree",
9500         "--check-data-csum           verify checkums of data blocks",
9501         "-Q|--qgroup-report           print a report on qgroup consistency",
9502         "-E|--subvol-extents <subvolid>",
9503         "                            print subvolume extents and sharing state",
9504         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9505         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9506         "-p|--progress               indicate progress",
9507         NULL
9508 };
9509
9510 int cmd_check(int argc, char **argv)
9511 {
9512         struct cache_tree root_cache;
9513         struct btrfs_root *root;
9514         struct btrfs_fs_info *info;
9515         u64 bytenr = 0;
9516         u64 subvolid = 0;
9517         u64 tree_root_bytenr = 0;
9518         u64 chunk_root_bytenr = 0;
9519         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9520         int ret;
9521         u64 num;
9522         int init_csum_tree = 0;
9523         int readonly = 0;
9524         int qgroup_report = 0;
9525         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9526
9527         while(1) {
9528                 int c;
9529                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9530                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9531                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9532                 static const struct option long_options[] = {
9533                         { "super", required_argument, NULL, 's' },
9534                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9535                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9536                         { "init-csum-tree", no_argument, NULL,
9537                                 GETOPT_VAL_INIT_CSUM },
9538                         { "init-extent-tree", no_argument, NULL,
9539                                 GETOPT_VAL_INIT_EXTENT },
9540                         { "check-data-csum", no_argument, NULL,
9541                                 GETOPT_VAL_CHECK_CSUM },
9542                         { "backup", no_argument, NULL, 'b' },
9543                         { "subvol-extents", required_argument, NULL, 'E' },
9544                         { "qgroup-report", no_argument, NULL, 'Q' },
9545                         { "tree-root", required_argument, NULL, 'r' },
9546                         { "chunk-root", required_argument, NULL,
9547                                 GETOPT_VAL_CHUNK_TREE },
9548                         { "progress", no_argument, NULL, 'p' },
9549                         { NULL, 0, NULL, 0}
9550                 };
9551
9552                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9553                 if (c < 0)
9554                         break;
9555                 switch(c) {
9556                         case 'a': /* ignored */ break;
9557                         case 'b':
9558                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9559                                 break;
9560                         case 's':
9561                                 num = arg_strtou64(optarg);
9562                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9563                                         fprintf(stderr,
9564                                                 "ERROR: super mirror should be less than: %d\n",
9565                                                 BTRFS_SUPER_MIRROR_MAX);
9566                                         exit(1);
9567                                 }
9568                                 bytenr = btrfs_sb_offset(((int)num));
9569                                 printf("using SB copy %llu, bytenr %llu\n", num,
9570                                        (unsigned long long)bytenr);
9571                                 break;
9572                         case 'Q':
9573                                 qgroup_report = 1;
9574                                 break;
9575                         case 'E':
9576                                 subvolid = arg_strtou64(optarg);
9577                                 break;
9578                         case 'r':
9579                                 tree_root_bytenr = arg_strtou64(optarg);
9580                                 break;
9581                         case GETOPT_VAL_CHUNK_TREE:
9582                                 chunk_root_bytenr = arg_strtou64(optarg);
9583                                 break;
9584                         case 'p':
9585                                 ctx.progress_enabled = true;
9586                                 break;
9587                         case '?':
9588                         case 'h':
9589                                 usage(cmd_check_usage);
9590                         case GETOPT_VAL_REPAIR:
9591                                 printf("enabling repair mode\n");
9592                                 repair = 1;
9593                                 ctree_flags |= OPEN_CTREE_WRITES;
9594                                 break;
9595                         case GETOPT_VAL_READONLY:
9596                                 readonly = 1;
9597                                 break;
9598                         case GETOPT_VAL_INIT_CSUM:
9599                                 printf("Creating a new CRC tree\n");
9600                                 init_csum_tree = 1;
9601                                 repair = 1;
9602                                 ctree_flags |= OPEN_CTREE_WRITES;
9603                                 break;
9604                         case GETOPT_VAL_INIT_EXTENT:
9605                                 init_extent_tree = 1;
9606                                 ctree_flags |= (OPEN_CTREE_WRITES |
9607                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9608                                 repair = 1;
9609                                 break;
9610                         case GETOPT_VAL_CHECK_CSUM:
9611                                 check_data_csum = 1;
9612                                 break;
9613                 }
9614         }
9615
9616         if (check_argc_exact(argc - optind, 1))
9617                 usage(cmd_check_usage);
9618
9619         if (ctx.progress_enabled) {
9620                 ctx.tp = TASK_NOTHING;
9621                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9622         }
9623
9624         /* This check is the only reason for --readonly to exist */
9625         if (readonly && repair) {
9626                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9627                 exit(1);
9628         }
9629
9630         radix_tree_init();
9631         cache_tree_init(&root_cache);
9632
9633         if((ret = check_mounted(argv[optind])) < 0) {
9634                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9635                 goto err_out;
9636         } else if(ret) {
9637                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9638                 ret = -EBUSY;
9639                 goto err_out;
9640         }
9641
9642         /* only allow partial opening under repair mode */
9643         if (repair)
9644                 ctree_flags |= OPEN_CTREE_PARTIAL;
9645
9646         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9647                                   chunk_root_bytenr, ctree_flags);
9648         if (!info) {
9649                 fprintf(stderr, "Couldn't open file system\n");
9650                 ret = -EIO;
9651                 goto err_out;
9652         }
9653
9654         global_info = info;
9655         root = info->fs_root;
9656
9657         /*
9658          * repair mode will force us to commit transaction which
9659          * will make us fail to load log tree when mounting.
9660          */
9661         if (repair && btrfs_super_log_root(info->super_copy)) {
9662                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9663                 if (!ret) {
9664                         ret = 1;
9665                         goto close_out;
9666                 }
9667                 ret = zero_log_tree(root);
9668                 if (ret) {
9669                         fprintf(stderr, "fail to zero log tree\n");
9670                         goto close_out;
9671                 }
9672         }
9673
9674         uuid_unparse(info->super_copy->fsid, uuidbuf);
9675         if (qgroup_report) {
9676                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9677                        uuidbuf);
9678                 ret = qgroup_verify_all(info);
9679                 if (ret == 0)
9680                         ret = report_qgroups(1);
9681                 goto close_out;
9682         }
9683         if (subvolid) {
9684                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9685                        subvolid, argv[optind], uuidbuf);
9686                 ret = print_extent_state(info, subvolid);
9687                 goto close_out;
9688         }
9689         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9690
9691         if (!extent_buffer_uptodate(info->tree_root->node) ||
9692             !extent_buffer_uptodate(info->dev_root->node) ||
9693             !extent_buffer_uptodate(info->chunk_root->node)) {
9694                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9695                 ret = -EIO;
9696                 goto close_out;
9697         }
9698
9699         if (init_extent_tree || init_csum_tree) {
9700                 struct btrfs_trans_handle *trans;
9701
9702                 trans = btrfs_start_transaction(info->extent_root, 0);
9703                 if (IS_ERR(trans)) {
9704                         fprintf(stderr, "Error starting transaction\n");
9705                         ret = PTR_ERR(trans);
9706                         goto close_out;
9707                 }
9708
9709                 if (init_extent_tree) {
9710                         printf("Creating a new extent tree\n");
9711                         ret = reinit_extent_tree(trans, info);
9712                         if (ret)
9713                                 goto close_out;
9714                 }
9715
9716                 if (init_csum_tree) {
9717                         fprintf(stderr, "Reinit crc root\n");
9718                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9719                         if (ret) {
9720                                 fprintf(stderr, "crc root initialization failed\n");
9721                                 ret = -EIO;
9722                                 goto close_out;
9723                         }
9724
9725                         ret = fill_csum_tree(trans, info->csum_root,
9726                                              init_extent_tree);
9727                         if (ret) {
9728                                 fprintf(stderr, "crc refilling failed\n");
9729                                 return -EIO;
9730                         }
9731                 }
9732                 /*
9733                  * Ok now we commit and run the normal fsck, which will add
9734                  * extent entries for all of the items it finds.
9735                  */
9736                 ret = btrfs_commit_transaction(trans, info->extent_root);
9737                 if (ret)
9738                         goto close_out;
9739         }
9740         if (!extent_buffer_uptodate(info->extent_root->node)) {
9741                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9742                 ret = -EIO;
9743                 goto close_out;
9744         }
9745         if (!extent_buffer_uptodate(info->csum_root->node)) {
9746                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9747                 ret = -EIO;
9748                 goto close_out;
9749         }
9750
9751         if (!ctx.progress_enabled)
9752                 fprintf(stderr, "checking extents\n");
9753         ret = check_chunks_and_extents(root);
9754         if (ret)
9755                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9756
9757         ret = repair_root_items(info);
9758         if (ret < 0)
9759                 goto close_out;
9760         if (repair) {
9761                 fprintf(stderr, "Fixed %d roots.\n", ret);
9762                 ret = 0;
9763         } else if (ret > 0) {
9764                 fprintf(stderr,
9765                        "Found %d roots with an outdated root item.\n",
9766                        ret);
9767                 fprintf(stderr,
9768                         "Please run a filesystem check with the option --repair to fix them.\n");
9769                 ret = 1;
9770                 goto close_out;
9771         }
9772
9773         if (!ctx.progress_enabled) {
9774                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9775                         fprintf(stderr, "checking free space tree\n");
9776                 else
9777                         fprintf(stderr, "checking free space cache\n");
9778         }
9779         ret = check_space_cache(root);
9780         if (ret)
9781                 goto out;
9782
9783         /*
9784          * We used to have to have these hole extents in between our real
9785          * extents so if we don't have this flag set we need to make sure there
9786          * are no gaps in the file extents for inodes, otherwise we can just
9787          * ignore it when this happens.
9788          */
9789         no_holes = btrfs_fs_incompat(root->fs_info,
9790                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9791         if (!ctx.progress_enabled)
9792                 fprintf(stderr, "checking fs roots\n");
9793         ret = check_fs_roots(root, &root_cache);
9794         if (ret)
9795                 goto out;
9796
9797         fprintf(stderr, "checking csums\n");
9798         ret = check_csums(root);
9799         if (ret)
9800                 goto out;
9801
9802         fprintf(stderr, "checking root refs\n");
9803         ret = check_root_refs(root, &root_cache);
9804         if (ret)
9805                 goto out;
9806
9807         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9808                 struct extent_buffer *eb;
9809
9810                 eb = list_first_entry(&root->fs_info->recow_ebs,
9811                                       struct extent_buffer, recow);
9812                 list_del_init(&eb->recow);
9813                 ret = recow_extent_buffer(root, eb);
9814                 if (ret)
9815                         break;
9816         }
9817
9818         while (!list_empty(&delete_items)) {
9819                 struct bad_item *bad;
9820
9821                 bad = list_first_entry(&delete_items, struct bad_item, list);
9822                 list_del_init(&bad->list);
9823                 if (repair)
9824                         ret = delete_bad_item(root, bad);
9825                 free(bad);
9826         }
9827
9828         if (info->quota_enabled) {
9829                 int err;
9830                 fprintf(stderr, "checking quota groups\n");
9831                 err = qgroup_verify_all(info);
9832                 if (err)
9833                         goto out;
9834         }
9835
9836         if (!list_empty(&root->fs_info->recow_ebs)) {
9837                 fprintf(stderr, "Transid errors in file system\n");
9838                 ret = 1;
9839         }
9840 out:
9841         /* Don't override original ret */
9842         if (ret)
9843                 report_qgroups(0);
9844         else
9845                 ret = report_qgroups(0);
9846         if (found_old_backref) { /*
9847                  * there was a disk format change when mixed
9848                  * backref was in testing tree. The old format
9849                  * existed about one week.
9850                  */
9851                 printf("\n * Found old mixed backref format. "
9852                        "The old format is not supported! *"
9853                        "\n * Please mount the FS in readonly mode, "
9854                        "backup data and re-format the FS. *\n\n");
9855                 ret = 1;
9856         }
9857         printf("found %llu bytes used err is %d\n",
9858                (unsigned long long)bytes_used, ret);
9859         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9860         printf("total tree bytes: %llu\n",
9861                (unsigned long long)total_btree_bytes);
9862         printf("total fs tree bytes: %llu\n",
9863                (unsigned long long)total_fs_tree_bytes);
9864         printf("total extent tree bytes: %llu\n",
9865                (unsigned long long)total_extent_tree_bytes);
9866         printf("btree space waste bytes: %llu\n",
9867                (unsigned long long)btree_space_waste);
9868         printf("file data blocks allocated: %llu\n referenced %llu\n",
9869                 (unsigned long long)data_bytes_allocated,
9870                 (unsigned long long)data_bytes_referenced);
9871
9872         free_root_recs_tree(&root_cache);
9873 close_out:
9874         close_ctree(root);
9875 err_out:
9876         if (ctx.progress_enabled)
9877                 task_deinit(ctx.info);
9878
9879         return ret;
9880 }